Regenerate DB #131
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Regenerate DB | |
on: | |
workflow_dispatch: | |
schedule: | |
- cron: '0 4 5,24 * *' # run on days 5/24 of each month (dumps start on days 1/20 and take a few days to complete and get mirrored) | |
jobs: | |
create-updated-wiki-dump: | |
strategy: | |
fail-fast: false | |
matrix: | |
lang: [en, fr, de, ja, it, ru, es, zh, pl, nl, pt, ar, fi, hu, th, he, tr, sv] | |
name: ${{ matrix.lang }}wiki Dump Gen | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Prerequisites | |
id: setup | |
run: | | |
sudo apt install -y jq xz-utils libxml2-dev libgc-dev | |
date=$(curl -L https://dumps.wikimedia.org/index.json | jq --raw-output '.wikis.${{ matrix.lang }}wiki.jobs.pagelinkstable.updated | split(" ") | .[0]') | |
if [ -z "$date" ]; then | |
echo "Error: date is empty" | |
exit 1 | |
fi | |
echo "date=$date" >> $GITHUB_OUTPUT | |
- name: Initialize swapfile | |
run: | | |
sudo fallocate -l 8G /swapfile | |
sudo chmod 600 /swapfile | |
sudo mkswap /swapfile | |
sudo swapon /swapfile | |
sudo swapon --show | |
- name: Build | |
run: | | |
cd db_gen | |
clang++ -Ofast -flto -march=native -mtune=native -I/usr/include/libxml2 db_gen.cc -o ../gen -std=c++17 -lxml2 -lgc | |
cd .. | |
curl -L "https://dumps.wikimedia.org/$(curl -L https://dumps.wikimedia.org/index.json | jq --raw-output '.wikis.${{ matrix.lang }}wiki.jobs | if has("articlesdumprecombine") then .articlesdumprecombine else .articlesdump end | .files | to_entries[0].value.url')" | bzcat | ./gen | xz -C crc32 --lzma2=preset=9e,lc=4,pb=2 -f - > "${{ matrix.lang }}.bin" | |
# if size of file is less than 1KB, something went wrong | |
if [ $(stat -c %s "${{ matrix.lang }}.bin") -lt 1024 ]; then | |
echo "File is too small, something went wrong" | |
exit 1 | |
fi | |
aws configure set aws_access_key_id ${{ secrets.AWS_ACCESS_KEY_ID }} | |
aws configure set aws_secret_access_key ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
aws configure set region auto | |
aws configure set output json | |
aws s3 cp ${{ matrix.lang }}.bin s3://wiki-dbs/${{ matrix.lang }}.bin --endpoint-url ${{ secrets.R2_AWS_ENDPOINT_URL }} | |
- name: Upload | |
uses: softprops/action-gh-release@v2 | |
with: | |
files: ${{ matrix.lang }}.bin | |
tag_name: ${{ steps.setup.outputs.date }} |