Skip to content

Site Indexer

Site Indexer #1326

Workflow file for this run

name: Site Indexer
on:
schedule:
- cron: '30 4 * * *'
workflow_dispatch:
inputs:
force:
description: 'Enter true to force the indexer to run.'
required: false
jobs:
activate:
runs-on: ubuntu-latest
if: |
(github.event_name == 'schedule' && github.repository_owner == 'asciidoctor') ||
github.event_name == 'workflow_dispatch'
steps:
- run: echo ok go
build:
needs: activate
runs-on: ubuntu-latest
container: algolia/docsearch-scraper:v1.12.0
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Index
env:
APPLICATION_ID: ${{ secrets.ALGOLIA_APP_ID }}
API_KEY: ${{ secrets.ALGOLIA_API_KEY }}
CONFIG: ${{ github.workspace }}/docsearch/config.json
run: |
curl -sL -o $CONFIG https://docs.asciidoctor.org/docsearch-config.json
INDEX_NAME=$(node -p "JSON.parse(require('fs').readFileSync('$CONFIG')).index_name")
SITEMAP_URL=$(node -p "JSON.parse(require('fs').readFileSync('$CONFIG')).sitemap_urls[0].replace('.xml', '-ROOT.xml')")
# only run indexer if index is more than 2 days out of date
STALENESS_THRESHOLD=172800000
SITE_LAST_MODIFIED=$(curl -s $SITEMAP_URL | awk '/<lastmod>/{gsub(/<\/?lastmod>/,"");print;exit}' | node -p "Date.parse(require('fs').readFileSync(0).toString().trim()) - $STALENESS_THRESHOLD")
SITE_LAST_INDEXED=$(curl -s -H X-Algolia-Application-Id:$APPLICATION_ID -H X-Algolia-API-Key:$API_KEY https://$APPLICATION_ID-dsn.algolia.net/1/indexes | node -p "Date.parse(JSON.parse(require('fs').readFileSync(0).toString()).items.find((it) => it.name === '$INDEX_NAME').updatedAt)")
if [ "${{ github.event.inputs.force }}" = "true" ] || [ $SITE_LAST_MODIFIED -gt $SITE_LAST_INDEXED ]; then
export HOME=/root
export INDEX_NAME_TMP="${INDEX_NAME}_${GITHUB_RUN_ID}"
cd $HOME
pipenv run python -m src.index
else
echo 'SKIP: Index is already up to date.'
fi