Merge pull request #6 from goodmami/add-release-action

Fix #3: add release script and GitHub action
globalwordnet · Dec 15, 2020 · 1276aad · 1276aad
2 parents 1720c12 + abcb9ba
commit 1276aad
Show file tree

Hide file tree

Showing 2 changed files with 72 additions and 0 deletions.
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -0,0 +1,27 @@
+name: Upload Assets
+
+on:
+  release:
+    types: [created]
+
+jobs:
+  run:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python 3.8
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.8'
+    - name: Install dependencies
+      run: python3.8 -m pip install "rdflib~=5.0"
+    - name: Build Assets
+      run: |
+        python3.8 make-tsv.py > cili.tsv
+        xz -z cili.tsv
+    - name: Upload
+      run: |
+        gh release upload "${GITHUB_REF##*/}" "cili.tsv.xz#CILI inventory and definitions"
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
diff --git a/make-tsv.py b/make-tsv.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python3
+
+"""
+Script to produce a TSV file for a release of CILI.
+
+The mappings to the Princeton WordNet generally don't need to be
+released regularly as they are unlikely to change and are already
+included in WN-LMF releases of the PWN, so this script reduces the
+ili.ttl file to a two-column tab-separated-value file containing only
+the ILI inventory and their definitions. This assumes that every ILI
+has a definition, which is true by design. The resulting .tsv file is
+less than half the size of the .ttl file when uncompressed, but
+roughly the same size when compressed. TSV is generally much faster to
+parse, however, and doesn't require an RDF library, so it is more
+appealing for downstream applications.
+
+Requirements:
+    - Python 3.6+
+    - rdflib
+Usage:
+    python3 make-tsv.py > cili.tsv
+
+"""
+
+import sys
+
+from rdflib import Graph
+from rdflib.namespace import SKOS
+
+
+g = Graph()
+g.parse("ili.ttl", format='ttl')
+
+# pair each ILI (ignoring the URL part) with its definition
+data = [(subj.rpartition('/')[2], obj)
+        for subj, obj
+        in g.subject_objects(predicate=SKOS.definition)]
+
+# sort by ILI number
+data.sort(key=lambda pair: int(pair[0].lstrip('i')))
+
+print('ILI\tDefinition')
+for ili, definition in data:
+    print(f'{ili}\t{definition}')
+