diff --git a/pkgs/development/libraries/languagemachines/frog.nix b/pkgs/development/libraries/languagemachines/frog.nix new file mode 100644 index 0000000000000..c80c28eb14bc1 --- /dev/null +++ b/pkgs/development/libraries/languagemachines/frog.nix @@ -0,0 +1,53 @@ +{ stdenv, fetchurl +, automake, autoconf, libtool, pkgconfig, autoconf-archive +, libxml2, icu +, languageMachines +}: + +let + release = builtins.fromJSON (builtins.readFile ./release-info/LanguageMachines-frog.json); +in + +stdenv.mkDerivation { + name = "frog"; + version = release.version; + src = fetchurl { inherit (release) url sha256; + name = "frog-${release.version}.tar.gz"; }; + buildInputs = [ automake autoconf libtool pkgconfig autoconf-archive + libxml2 icu + languageMachines.ticcutils + languageMachines.timbl + languageMachines.mbt + languageMachines.libfolia + languageMachines.ucto + languageMachines.frogdata + ]; + + preConfigure = '' + sh bootstrap.sh + ''; + postInstall = '' + # frog expects the data files installed in the same prefix + mkdir -p $out/share/frog/; + for f in ${languageMachines.frogdata}/share/frog/*; do + ln -s $f $out/share/frog/; + done; + + make check + ''; + + meta = with stdenv.lib; { + description = "A Tagger-Lemmatizer-Morphological-Analyzer-Dependency-Parser for Dutch"; + homepage = https://languagemachines.github.io/frog; + license = licenses.gpl3; + platforms = platforms.all; + maintainers = with maintainers; [ roberth ]; + + longDescription = '' + Frog is an integration of memory-based natural language processing (NLP) modules developed for Dutch. All NLP modules are based on Timbl, the Tilburg memory-based learning software package. Most modules were created in the 1990s at the ILK Research Group (Tilburg University, the Netherlands) and the CLiPS Research Centre (University of Antwerp, Belgium). Over the years they have been integrated into a single text processing tool, which is currently maintained and developed by the Language Machines Research Group and the Centre for Language and Speech Technology at Radboud University Nijmegen. A dependency parser, a base phrase chunker, and a named-entity recognizer module were added more recently. Where possible, Frog makes use of multi-processor support to run subtasks in parallel. + + Various (re)programming rounds have been made possible through funding by NWO, the Netherlands Organisation for Scientific Research, particularly under the CGN project, the IMIX programme, the Implicit Linguistics project, the CLARIN-NL programme and the CLARIAH programme. + ''; + }; + +} diff --git a/pkgs/development/libraries/languagemachines/frogdata.nix b/pkgs/development/libraries/languagemachines/frogdata.nix new file mode 100644 index 0000000000000..d9578c380e6d5 --- /dev/null +++ b/pkgs/development/libraries/languagemachines/frogdata.nix @@ -0,0 +1,31 @@ +{ stdenv, fetchurl +, automake, autoconf, libtool, pkgconfig, autoconf-archive +, libxml2, icu +, languageMachines +}: + +let + release = builtins.fromJSON (builtins.readFile ./release-info/LanguageMachines-frogdata.json); +in + +stdenv.mkDerivation { + name = "frogdata"; + version = release.version; + src = fetchurl { inherit (release) url sha256; + name = "frogdata-${release.version}.tar.gz"; }; + buildInputs = [ automake autoconf libtool pkgconfig autoconf-archive + ]; + + preConfigure = '' + sh bootstrap.sh + ''; + + meta = with stdenv.lib; { + description = "Data for Frog, a Tagger-Lemmatizer-Morphological-Analyzer-Dependency-Parser for Dutch"; + homepage = https://languagemachines.github.io/frog; + license = licenses.gpl3; + platforms = platforms.all; + maintainers = with maintainers; [ roberth ]; + }; + +} diff --git a/pkgs/development/libraries/languagemachines/libfolia.nix b/pkgs/development/libraries/languagemachines/libfolia.nix new file mode 100644 index 0000000000000..9cddbdd22d940 --- /dev/null +++ b/pkgs/development/libraries/languagemachines/libfolia.nix @@ -0,0 +1,30 @@ +{ stdenv, fetchurl +, automake, autoconf, libtool, pkgconfig, autoconf-archive +, libxml2, icu +, languageMachines }: + +let + release = builtins.fromJSON (builtins.readFile ./release-info/LanguageMachines-libfolia.json); +in + +stdenv.mkDerivation { + name = "libfolia"; + version = release.version; + src = fetchurl { inherit (release) url sha256; + name = "libfolia-${release.version}.tar.gz"; }; + buildInputs = [ automake autoconf libtool pkgconfig autoconf-archive libxml2 icu languageMachines.ticcutils ]; + preConfigure = "sh bootstrap.sh"; + + meta = with stdenv.lib; { + description = "A C++ API for FoLiA documents; an XML-based linguistic annotation format."; + homepage = https://proycon.github.io/folia/; + license = licenses.gpl3; + platforms = platforms.all; + maintainers = with maintainers; [ roberth ]; + + longDescription = '' + A high-level C++ API to read, manipulate, and create FoLiA documents. FoLiA is an XML-based annotation format, suitable for the representation of linguistically annotated language resources. FoLiA’s intended use is as a format for storing and/or exchanging language resources, including corpora. + ''; + }; + +} diff --git a/pkgs/development/libraries/languagemachines/mbt-add-libxml2-dep.patch b/pkgs/development/libraries/languagemachines/mbt-add-libxml2-dep.patch new file mode 100644 index 0000000000000..9037f1093bb9d --- /dev/null +++ b/pkgs/development/libraries/languagemachines/mbt-add-libxml2-dep.patch @@ -0,0 +1,13 @@ +--- a/configure.ac 2017-06-12 06:48:15.000000000 +0200 ++++ b/configure.ac 2017-06-12 06:50:06.000000000 +0200 +@@ -76,6 +76,10 @@ + CXXFLAGS="$CXXFLAGS $ticcutils_CFLAGS" + LIBS="$LIBS $ticcutils_LIBS" + ++PKG_CHECK_MODULES([libxml2], [libxml-2.0 >= 2.6.16] ) ++CXXFLAGS="$CXXFLAGS $libxml2_CFLAGS" ++LIBS="$LIBS $libxml2_LIBS" ++ + AC_CONFIG_FILES([ + Makefile + mbt.pc diff --git a/pkgs/development/libraries/languagemachines/mbt.nix b/pkgs/development/libraries/languagemachines/mbt.nix new file mode 100644 index 0000000000000..0ba7e686e4346 --- /dev/null +++ b/pkgs/development/libraries/languagemachines/mbt.nix @@ -0,0 +1,40 @@ +{ stdenv, fetchurl +, automake, autoconf, libtool, pkgconfig, autoconf-archive +, libxml2 +, languageMachines +}: + +let + release = builtins.fromJSON (builtins.readFile ./release-info/LanguageMachines-mbt.json); +in + +stdenv.mkDerivation { + name = "mbt"; + version = release.version; + src = fetchurl { inherit (release) url sha256; + name = "mbt-${release.version}.tar.gz"; }; + buildInputs = [ automake autoconf libtool pkgconfig autoconf-archive + libxml2 + languageMachines.ticcutils + languageMachines.timbl + ]; + patches = [ ./mbt-add-libxml2-dep.patch ]; + preConfigure = '' + sh bootstrap.sh + ''; + + meta = with stdenv.lib; { + description = "Memory Based Tagger"; + homepage = https://languagemachines.github.io/mbt/; + license = licenses.gpl3; + platforms = platforms.all; + maintainers = with maintainers; [ roberth ]; + + longDescription = '' + MBT is a memory-based tagger-generator and tagger in one. The tagger-generator part can generate a sequence tagger on the basis of a training set of tagged sequences; the tagger part can tag new sequences. MBT can, for instance, be used to generate part-of-speech taggers or chunkers for natural language processing. It has also been used for named-entity recognition, information extraction in domain-specific texts, and disfluency chunking in transcribed speech. + + Mbt is used by Frog for Dutch tagging. + ''; + }; + +} diff --git a/pkgs/development/libraries/languagemachines/packages.nix b/pkgs/development/libraries/languagemachines/packages.nix new file mode 100644 index 0000000000000..c2d449ed13c69 --- /dev/null +++ b/pkgs/development/libraries/languagemachines/packages.nix @@ -0,0 +1,14 @@ +{ callPackage }: +{ + ticcutils = callPackage ./ticcutils.nix { }; + libfolia = callPackage ./libfolia.nix { }; + ucto = callPackage ./ucto.nix { }; + uctodata = callPackage ./uctodata.nix { }; + timbl = callPackage ./timbl.nix { }; + timblserver = callPackage ./timblserver.nix { }; + mbt = callPackage ./mbt.nix { }; + frog = callPackage ./frog.nix { }; + frogdata = callPackage ./frogdata.nix { }; + + test = callPackage ./test.nix { }; +} diff --git a/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-frog.json b/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-frog.json new file mode 100644 index 0000000000000..55c2ec20a312b --- /dev/null +++ b/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-frog.json @@ -0,0 +1,5 @@ +{ + "version": "v0.13.7", + "url": "https://api.github.com/repos/LanguageMachines/frog/tarball/v0.13.7", + "sha256": "0swyfi3g862n888qj8v8kd18745hasy0vnc70i9qlv0ji0321bnf" +} diff --git a/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-frogdata.json b/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-frogdata.json new file mode 100644 index 0000000000000..1147322be6da8 --- /dev/null +++ b/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-frogdata.json @@ -0,0 +1,5 @@ +{ + "version": "v0.13", + "url": "https://api.github.com/repos/LanguageMachines/frogdata/tarball/v0.13", + "sha256": "13mhv8qacl0n20ddl1ay49xi6h2m0a149ya3rrsmaah3x4adb4sg" +} diff --git a/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-libfolia.json b/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-libfolia.json new file mode 100644 index 0000000000000..792d958213fbb --- /dev/null +++ b/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-libfolia.json @@ -0,0 +1,5 @@ +{ + "version": "v1.7", + "url": "https://api.github.com/repos/LanguageMachines/libfolia/tarball/v1.7", + "sha256": "0hpxdry7n2887klryc587xv46p6z6jp6hz9x7k2pk5v7jb0z4s65" +} diff --git a/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-mbt.json b/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-mbt.json new file mode 100644 index 0000000000000..f1bbff47a28ed --- /dev/null +++ b/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-mbt.json @@ -0,0 +1,5 @@ +{ + "version": "v3.2.16", + "url": "https://api.github.com/repos/LanguageMachines/mbt/tarball/v3.2.16", + "sha256": "0f9f5l84m0lmmv4km9myn3yhy67jbmk3qn2fi40dy025gx4l0x3x" +} diff --git a/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-ticcutils.json b/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-ticcutils.json new file mode 100644 index 0000000000000..11069c6b02c74 --- /dev/null +++ b/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-ticcutils.json @@ -0,0 +1,5 @@ +{ + "version": "v0.15", + "url": "https://api.github.com/repos/LanguageMachines/ticcutils/tarball/v0.15", + "sha256": "0lssb1klx2flmr6fy78j37i5lbq3gfhzjx24j6n72ndm2rvprvcn" +} diff --git a/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-timbl.json b/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-timbl.json new file mode 100644 index 0000000000000..d35f2c8333aab --- /dev/null +++ b/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-timbl.json @@ -0,0 +1,5 @@ +{ + "version": "v6.4.9", + "url": "https://api.github.com/repos/LanguageMachines/timbl/tarball/v6.4.9", + "sha256": "1279npc3xlq05hnkylpbkgg941gjhvl6sd5fw4vgwcx2rwmmlaay" +} diff --git a/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-timblserver.json b/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-timblserver.json new file mode 100644 index 0000000000000..d588da3f8b6e9 --- /dev/null +++ b/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-timblserver.json @@ -0,0 +1,5 @@ +{ + "version": "v1.11", + "url": "https://api.github.com/repos/LanguageMachines/timblserver/tarball/v1.11", + "sha256": "02k8c704wr5miy82w6zj0imm7sdfnxf3db34qiaa8l3myhn17qlw" +} diff --git a/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-ucto.json b/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-ucto.json new file mode 100644 index 0000000000000..9b05cf3e1393e --- /dev/null +++ b/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-ucto.json @@ -0,0 +1,5 @@ +{ + "version": "v0.9.6", + "url": "https://api.github.com/repos/LanguageMachines/ucto/tarball/v0.9.6", + "sha256": "0fxq4j32g7kp6789xz23651c4v2j7zlz87cshfv9g1xjs7jxns3f" +} diff --git a/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-uctodata.json b/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-uctodata.json new file mode 100644 index 0000000000000..08069bb333bc9 --- /dev/null +++ b/pkgs/development/libraries/languagemachines/release-info/LanguageMachines-uctodata.json @@ -0,0 +1,5 @@ +{ + "version": "v0.4", + "url": "https://api.github.com/repos/LanguageMachines/uctodata/tarball/v0.4", + "sha256": "02c78qmwi9ijpk5wila3p62fmfdy1rpmlvvzbxs3wg0rdb0nwvd2" +} diff --git a/pkgs/development/libraries/languagemachines/test.nix b/pkgs/development/libraries/languagemachines/test.nix new file mode 100644 index 0000000000000..48c41ac52f222 --- /dev/null +++ b/pkgs/development/libraries/languagemachines/test.nix @@ -0,0 +1,25 @@ +{ runCommand +, languageMachines +}: + +runCommand "frog-test" {} '' + ${languageMachines.frog}/bin/frog >$out </dev/null || expected "Stemming works" + grep "een" $out | grep "onbep" >/dev/null || expected "Tagging works" + + deps="$(echo $(awk 'BEGIN { FS = "\t*" } ; {print $1 " -> " $9 "; "}' <$out))" + test "1 -> 2; 2 -> 0; 3 -> 4; 4 -> 2; -> ;" = "$deps" || expected "Dependency parsing works" +'' diff --git a/pkgs/development/libraries/languagemachines/ticcutils.nix b/pkgs/development/libraries/languagemachines/ticcutils.nix new file mode 100644 index 0000000000000..f1cb62e68020b --- /dev/null +++ b/pkgs/development/libraries/languagemachines/ticcutils.nix @@ -0,0 +1,29 @@ +{ stdenv, fetchurl +, automake, autoconf, libtool, pkgconfig, autoconf-archive +, libxml2, zlib, bzip2, libtar }: + +let + release = builtins.fromJSON (builtins.readFile ./release-info/LanguageMachines-ticcutils.json); +in + +stdenv.mkDerivation { + name = "ticcutils"; + version = release.version; + src = fetchurl { inherit (release) url sha256; + name = "ticcutils-${release.version}.tar.gz"; }; + buildInputs = [ automake autoconf libtool pkgconfig autoconf-archive libxml2 + # optional: + zlib bzip2 libtar + # broken but optional: boost + ]; + preConfigure = "sh bootstrap.sh"; + + meta = with stdenv.lib; { + description = "This module contains useful functions for general use in the TiCC software stack and beyond."; + homepage = https://github.com/LanguageMachines/ticcutils; + license = licenses.gpl3; + platforms = platforms.all; + maintainers = with maintainers; [ roberth ]; + }; + +} diff --git a/pkgs/development/libraries/languagemachines/timbl.nix b/pkgs/development/libraries/languagemachines/timbl.nix new file mode 100644 index 0000000000000..6a60996dc604a --- /dev/null +++ b/pkgs/development/libraries/languagemachines/timbl.nix @@ -0,0 +1,36 @@ +{ stdenv, fetchurl +, automake, autoconf, libtool, pkgconfig, autoconf-archive +, libxml2 +, languageMachines +}: + +let + release = builtins.fromJSON (builtins.readFile ./release-info/LanguageMachines-timbl.json); +in + +stdenv.mkDerivation { + name = "timbl"; + version = release.version; + src = fetchurl { inherit (release) url sha256; + name = "timbl-${release.version}.tar.gz"; }; + buildInputs = [ automake autoconf libtool pkgconfig autoconf-archive + libxml2 + languageMachines.ticcutils + ]; + preConfigure = "sh bootstrap.sh"; + + meta = with stdenv.lib; { + description = "TiMBL implements several memory-based learning algorithms"; + homepage = https://github.com/LanguageMachines/timbl/; + license = licenses.gpl3; + platforms = platforms.all; + maintainers = with maintainers; [ roberth ]; + + longDescription = '' + TiMBL is an open source software package implementing several memory-based learning algorithms, among which IB1-IG, an implementation of k-nearest neighbor classification with feature weighting suitable for symbolic feature spaces, and IGTree, a decision-tree approximation of IB1-IG. All implemented algorithms have in common that they store some representation of the training set explicitly in memory. During testing, new cases are classified by extrapolation from the most similar stored cases. + + For over fifteen years TiMBL has been mostly used in natural language processing as a machine learning classifier component, but its use extends to virtually any supervised machine learning domain. Due to its particular decision-tree-based implementation, TiMBL is in many cases far more efficient in classification than a standard k-nearest neighbor algorithm would be. + ''; + }; + +} diff --git a/pkgs/development/libraries/languagemachines/timblserver.nix b/pkgs/development/libraries/languagemachines/timblserver.nix new file mode 100644 index 0000000000000..d8659c9a86b35 --- /dev/null +++ b/pkgs/development/libraries/languagemachines/timblserver.nix @@ -0,0 +1,37 @@ +{ stdenv, fetchurl +, automake, autoconf, libtool, pkgconfig, autoconf-archive +, libxml2 +, languageMachines +}: + +let + release = builtins.fromJSON (builtins.readFile ./release-info/LanguageMachines-timblserver.json); +in + +stdenv.mkDerivation { + name = "timblserver"; + version = release.version; + src = fetchurl { inherit (release) url sha256; + name = "timblserver-${release.version}.tar.gz"; }; + buildInputs = [ automake autoconf libtool pkgconfig autoconf-archive + libxml2 + languageMachines.ticcutils + languageMachines.timbl + ]; + preConfigure = "sh bootstrap.sh"; + + meta = with stdenv.lib; { + description = "This server for TiMBL implements several memory-based learning algorithms"; + homepage = https://github.com/LanguageMachines/timblserver/; + license = licenses.gpl3; + platforms = platforms.all; + maintainers = with maintainers; [ roberth ]; + + longDescription = '' + This implements a server for TiMBL. TiMBL is an open source software package implementing several memory-based learning algorithms, among which IB1-IG, an implementation of k-nearest neighbor classification with feature weighting suitable for symbolic feature spaces, and IGTree, a decision-tree approximation of IB1-IG. All implemented algorithms have in common that they store some representation of the training set explicitly in memory. During testing, new cases are classified by extrapolation from the most similar stored cases. + + For over fifteen years TiMBL has been mostly used in natural language processing as a machine learning classifier component, but its use extends to virtually any supervised machine learning domain. Due to its particular decision-tree-based implementation, TiMBL is in many cases far more efficient in classification than a standard k-nearest neighbor algorithm would be. + ''; + }; + +} diff --git a/pkgs/development/libraries/languagemachines/ucto.nix b/pkgs/development/libraries/languagemachines/ucto.nix new file mode 100644 index 0000000000000..d60bca412720b --- /dev/null +++ b/pkgs/development/libraries/languagemachines/ucto.nix @@ -0,0 +1,48 @@ +{ stdenv, fetchurl +, automake, autoconf, libtool, pkgconfig, autoconf-archive +, libxml2, icu +, languageMachines +}: + +let + release = builtins.fromJSON (builtins.readFile ./release-info/LanguageMachines-ucto.json); +in + +stdenv.mkDerivation { + name = "ucto"; + version = release.version; + src = fetchurl { inherit (release) url sha256; + name = "ucto-${release.version}.tar.gz"; }; + buildInputs = [ automake autoconf libtool pkgconfig autoconf-archive + icu libxml2 + languageMachines.ticcutils + languageMachines.libfolia + languageMachines.uctodata + # TODO textcat from libreoffice? Pulls in X11 dependencies? + ]; + preConfigure = "sh bootstrap.sh;"; + + postInstall = '' + # ucto expects the data files installed in the same prefix + mkdir -p $out/share/ucto/; + for f in ${languageMachines.uctodata}/share/ucto/*; do + echo "Linking $f" + ln -s $f $out/share/ucto/; + done; + ''; + + meta = with stdenv.lib; { + description = "A rule-based tokenizer for natural language"; + homepage = https://languagemachines.github.io/ucto/; + license = licenses.gpl3; + platforms = platforms.all; + maintainers = with maintainers; [ roberth ]; + + longDescription = '' + Ucto tokenizes text files: it separates words from punctuation, and splits sentences. It offers several other basic preprocessing steps such as changing case that you can all use to make your text suited for further processing such as indexing, part-of-speech tagging, or machine translation. + + Ucto comes with tokenisation rules for several languages and can be easily extended to suit other languages. It has been incorporated for tokenizing Dutch text in Frog, a Dutch morpho-syntactic processor. + ''; + }; + +} diff --git a/pkgs/development/libraries/languagemachines/uctodata.nix b/pkgs/development/libraries/languagemachines/uctodata.nix new file mode 100644 index 0000000000000..33037dbb87fa9 --- /dev/null +++ b/pkgs/development/libraries/languagemachines/uctodata.nix @@ -0,0 +1,32 @@ +{ stdenv, fetchurl +, automake, autoconf, libtool, pkgconfig, autoconf-archive +, libxml2, icu +, languageMachines }: + +let + release = builtins.fromJSON (builtins.readFile ./release-info/LanguageMachines-uctodata.json); +in + +stdenv.mkDerivation { + name = "uctodata"; + version = release.version; + src = fetchurl { inherit (release) url sha256; + name = "uctodata-${release.version}.tar.gz"; }; + buildInputs = [ automake autoconf libtool pkgconfig autoconf-archive ]; + preConfigure = "sh bootstrap.sh"; + + meta = with stdenv.lib; { + description = "A rule-based tokenizer for natural language"; + homepage = https://languagemachines.github.io/ucto/; + license = licenses.gpl3; + platforms = platforms.all; + maintainers = with maintainers; [ roberth ]; + + longDescription = '' + Ucto tokenizes text files: it separates words from punctuation, and splits sentences. It offers several other basic preprocessing steps such as changing case that you can all use to make your text suited for further processing such as indexing, part-of-speech tagging, or machine translation. + + Ucto comes with tokenisation rules for several languages and can be easily extended to suit other languages. It has been incorporated for tokenizing Dutch text in Frog, a Dutch morpho-syntactic processor. + ''; + }; + +} diff --git a/pkgs/development/libraries/languagemachines/update b/pkgs/development/libraries/languagemachines/update new file mode 100755 index 0000000000000..3189637df7463 --- /dev/null +++ b/pkgs/development/libraries/languagemachines/update @@ -0,0 +1,79 @@ +#!/usr/bin/env nix-shell +#!nix-shell --packages curl +#!nix-shell --packages jq +#!nix-shell --packages parallel +#!nix-shell -i bash + +# Exit immediately if a command exits with a non-zero status. +# Exit when a producer fails in a pipe +# Treat undefined variable references as errors +set -e -o pipefail -u + +# Check if working directory is (probably) right +test "./update" = $0 || { + echo "The working directory ought to be the same is the update script location. Please invoke as ./update" 1>&2 + exit 1 +} + +# Create temporary directory with automatic cleanup +readonly MY_TMP="$(mktemp -d)" +cleanup () { + rm -rf "$MY_TMP" +} +trap cleanup EXIT + +# stdout: file containing release info and a convenient placeholder +# for the sha256 attribute +getRelease () { + local owner="$1" + local repo="$2" + local out="$MY_TMP/$owner--$repo-release" + curl -fSs https://api.github.com/repos/"$owner"/"$repo"/releases/latest \ + | jq '{ version: .name, url: .tarball_url, sha256: "__SHA256__" }' \ + > "$out" + echo "$out" +} + +# 'getters' for the release info file + +# stdout: unquoted tarball url +releaseUrl () { + local file="$1" + jq -r '.url' <"$file" +} + +# stdout: unquoted version +releaseVersion () { + local file="$1" + jq -r '.version' <"$file" +} + +# Fetch release tarball and compute hash +# stdout: base32 sha256 to be used in fetchurl +getReleaseHash () { + local file="$1" + local name="$2" + nix-prefetch-url "$(releaseUrl "$file")" --name "$name-$(releaseVersion "$file").tar.gz" +} + +# Write a release info file to release-info/$owner-$repo.json +updateRelease () { + local owner="$1" + local repo="$2" + local r="$(getRelease "$owner" "$repo")" + local hash="$(getReleaseHash "$r" "$repo")" + sed \ + -e s/__SHA256__/"$hash"/\ + <"$r" \ + >"release-info/$owner-$repo.json" +} + +updateRelease LanguageMachines frogdata +updateRelease LanguageMachines frog +updateRelease LanguageMachines libfolia +updateRelease LanguageMachines mbt +updateRelease LanguageMachines ticcutils +updateRelease LanguageMachines timbl +updateRelease LanguageMachines timblserver +updateRelease LanguageMachines ucto +updateRelease LanguageMachines uctodata diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index 090a44009a1d0..919776ebe8fbb 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -7891,6 +7891,8 @@ with pkgs; freetts = callPackage ../development/libraries/freetts { }; + frog = self.languageMachines.frog; + fstrm = callPackage ../development/libraries/fstrm { }; cfitsio = callPackage ../development/libraries/cfitsio { }; @@ -8493,6 +8495,8 @@ with pkgs; }; libkrb5 = krb5Full.override { type = "lib"; }; + languageMachines = recurseIntoAttrs (import ../development/libraries/languagemachines/packages.nix { inherit callPackage; }); + lasso = callPackage ../development/libraries/lasso { }; LASzip = callPackage ../development/libraries/LASzip { };