GenBank fetch and ingest #797
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: GenBank fetch and ingest | |
on: | |
schedule: | |
# Note times are in UTC, which is 1 or 2 hours behind CET depending on daylight savings. | |
# | |
# Currently, we aim to trigger ingest every day at 18:07 UTC which is 19:07 CET (as of Mar 2022). | |
# Note the actual runs might be late. As of right now, the action starts around 20 past the hour. | |
# Numerous people were confused, about that, including me: | |
# - https://git.luolix.topmunity/t/scheduled-action-running-consistently-late/138025/11 | |
# - https://github.com/github/docs/issues/3059 | |
# | |
# Note, '*' is a special character in YAML, so you have to quote this string. | |
# | |
# Docs: | |
# - https://docs.github.com/en/actions/learn-github-actions/events-that-trigger-workflows#schedule | |
# | |
# Tool that deciphers this particular format of crontab string: | |
# - https://crontab.guru/ | |
# | |
# Looks like you are about to modify this schedule? Make sure you also modify the schedule for the | |
# sister GISAID job, so that we don't need to keep two schedules in our heads. | |
- cron: '7 18 * * *' | |
# Manually triggered using `./vendored/trigger nextstrain/ncov-ingest genbank/fetch-and-ingest` (or `fetch-and-ingest`, which | |
# includes GISAID) | |
repository_dispatch: | |
types: | |
- genbank/fetch-and-ingest | |
- fetch-and-ingest | |
# Manually triggered using GitHub's UI | |
workflow_dispatch: | |
jobs: | |
fetch-and-ingest: | |
runs-on: ubuntu-latest | |
env: | |
GITHUB_RUN_ID: ${{ github.run_id }} | |
SLACK_CHANNELS: ncov-genbank-updates | |
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }} | |
steps: | |
- uses: actions/checkout@v3 | |
- uses: nextstrain/.github/actions/setup-nextstrain-cli@master | |
- name: run_pipeline | |
run: | | |
./bin/write-envdir env.d \ | |
AWS_DEFAULT_REGION \ | |
GITHUB_RUN_ID \ | |
SLACK_TOKEN \ | |
SLACK_CHANNELS \ | |
PAT_GITHUB_DISPATCH | |
declare -a config | |
config+=( | |
fetch_from_database=True | |
trigger_rebuild=True | |
trigger_counts=True | |
) | |
nextstrain build \ | |
--aws-batch \ | |
--detach \ | |
--no-download \ | |
--image nextstrain/ncov-ingest \ | |
--cpus 16 \ | |
--memory 68GiB \ | |
--exec env \ | |
. \ | |
envdir env.d snakemake \ | |
--configfile config/genbank.yaml \ | |
--config "${config[@]}" \ | |
--cores 16 \ | |
--resources mem_mb=68000 \ | |
--printshellcmds | |
env: | |
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} | |
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
PAT_GITHUB_DISPATCH: ${{ secrets.GH_TOKEN_NEXTSTRAIN_BOT_WORKFLOW_DISPATCH }} | |
- name: notify_pipeline_failed | |
if: ${{ failure() }} | |
run: ./vendored/notify-on-job-fail Ingest nextstrain/ncov-ingest |