-
-
Notifications
You must be signed in to change notification settings - Fork 13.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #28036 from roberth/frog
frog: init at v0.13.7
- Loading branch information
Showing
23 changed files
with
516 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
{ stdenv, fetchurl | ||
, automake, autoconf, libtool, pkgconfig, autoconf-archive | ||
, libxml2, icu | ||
, languageMachines | ||
}: | ||
|
||
let | ||
release = builtins.fromJSON (builtins.readFile ./release-info/LanguageMachines-frog.json); | ||
in | ||
|
||
stdenv.mkDerivation { | ||
name = "frog"; | ||
version = release.version; | ||
src = fetchurl { inherit (release) url sha256; | ||
name = "frog-${release.version}.tar.gz"; }; | ||
buildInputs = [ automake autoconf libtool pkgconfig autoconf-archive | ||
libxml2 icu | ||
languageMachines.ticcutils | ||
languageMachines.timbl | ||
languageMachines.mbt | ||
languageMachines.libfolia | ||
languageMachines.ucto | ||
languageMachines.frogdata | ||
]; | ||
|
||
preConfigure = '' | ||
sh bootstrap.sh | ||
''; | ||
postInstall = '' | ||
# frog expects the data files installed in the same prefix | ||
mkdir -p $out/share/frog/; | ||
for f in ${languageMachines.frogdata}/share/frog/*; do | ||
ln -s $f $out/share/frog/; | ||
done; | ||
make check | ||
''; | ||
|
||
meta = with stdenv.lib; { | ||
description = "A Tagger-Lemmatizer-Morphological-Analyzer-Dependency-Parser for Dutch"; | ||
homepage = https://languagemachines.github.io/frog; | ||
license = licenses.gpl3; | ||
platforms = platforms.all; | ||
maintainers = with maintainers; [ roberth ]; | ||
|
||
longDescription = '' | ||
Frog is an integration of memory-based natural language processing (NLP) modules developed for Dutch. All NLP modules are based on Timbl, the Tilburg memory-based learning software package. Most modules were created in the 1990s at the ILK Research Group (Tilburg University, the Netherlands) and the CLiPS Research Centre (University of Antwerp, Belgium). Over the years they have been integrated into a single text processing tool, which is currently maintained and developed by the Language Machines Research Group and the Centre for Language and Speech Technology at Radboud University Nijmegen. A dependency parser, a base phrase chunker, and a named-entity recognizer module were added more recently. Where possible, Frog makes use of multi-processor support to run subtasks in parallel. | ||
Various (re)programming rounds have been made possible through funding by NWO, the Netherlands Organisation for Scientific Research, particularly under the CGN project, the IMIX programme, the Implicit Linguistics project, the CLARIN-NL programme and the CLARIAH programme. | ||
''; | ||
}; | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
{ stdenv, fetchurl | ||
, automake, autoconf, libtool, pkgconfig, autoconf-archive | ||
, libxml2, icu | ||
, languageMachines | ||
}: | ||
|
||
let | ||
release = builtins.fromJSON (builtins.readFile ./release-info/LanguageMachines-frogdata.json); | ||
in | ||
|
||
stdenv.mkDerivation { | ||
name = "frogdata"; | ||
version = release.version; | ||
src = fetchurl { inherit (release) url sha256; | ||
name = "frogdata-${release.version}.tar.gz"; }; | ||
buildInputs = [ automake autoconf libtool pkgconfig autoconf-archive | ||
]; | ||
|
||
preConfigure = '' | ||
sh bootstrap.sh | ||
''; | ||
|
||
meta = with stdenv.lib; { | ||
description = "Data for Frog, a Tagger-Lemmatizer-Morphological-Analyzer-Dependency-Parser for Dutch"; | ||
homepage = https://languagemachines.github.io/frog; | ||
license = licenses.gpl3; | ||
platforms = platforms.all; | ||
maintainers = with maintainers; [ roberth ]; | ||
}; | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
{ stdenv, fetchurl | ||
, automake, autoconf, libtool, pkgconfig, autoconf-archive | ||
, libxml2, icu | ||
, languageMachines }: | ||
|
||
let | ||
release = builtins.fromJSON (builtins.readFile ./release-info/LanguageMachines-libfolia.json); | ||
in | ||
|
||
stdenv.mkDerivation { | ||
name = "libfolia"; | ||
version = release.version; | ||
src = fetchurl { inherit (release) url sha256; | ||
name = "libfolia-${release.version}.tar.gz"; }; | ||
buildInputs = [ automake autoconf libtool pkgconfig autoconf-archive libxml2 icu languageMachines.ticcutils ]; | ||
preConfigure = "sh bootstrap.sh"; | ||
|
||
meta = with stdenv.lib; { | ||
description = "A C++ API for FoLiA documents; an XML-based linguistic annotation format."; | ||
homepage = https://proycon.github.io/folia/; | ||
license = licenses.gpl3; | ||
platforms = platforms.all; | ||
maintainers = with maintainers; [ roberth ]; | ||
|
||
longDescription = '' | ||
A high-level C++ API to read, manipulate, and create FoLiA documents. FoLiA is an XML-based annotation format, suitable for the representation of linguistically annotated language resources. FoLiA’s intended use is as a format for storing and/or exchanging language resources, including corpora. | ||
''; | ||
}; | ||
|
||
} |
13 changes: 13 additions & 0 deletions
13
pkgs/development/libraries/languagemachines/mbt-add-libxml2-dep.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
--- a/configure.ac 2017-06-12 06:48:15.000000000 +0200 | ||
+++ b/configure.ac 2017-06-12 06:50:06.000000000 +0200 | ||
@@ -76,6 +76,10 @@ | ||
CXXFLAGS="$CXXFLAGS $ticcutils_CFLAGS" | ||
LIBS="$LIBS $ticcutils_LIBS" | ||
|
||
+PKG_CHECK_MODULES([libxml2], [libxml-2.0 >= 2.6.16] ) | ||
+CXXFLAGS="$CXXFLAGS $libxml2_CFLAGS" | ||
+LIBS="$LIBS $libxml2_LIBS" | ||
+ | ||
AC_CONFIG_FILES([ | ||
Makefile | ||
mbt.pc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
{ stdenv, fetchurl | ||
, automake, autoconf, libtool, pkgconfig, autoconf-archive | ||
, libxml2 | ||
, languageMachines | ||
}: | ||
|
||
let | ||
release = builtins.fromJSON (builtins.readFile ./release-info/LanguageMachines-mbt.json); | ||
in | ||
|
||
stdenv.mkDerivation { | ||
name = "mbt"; | ||
version = release.version; | ||
src = fetchurl { inherit (release) url sha256; | ||
name = "mbt-${release.version}.tar.gz"; }; | ||
buildInputs = [ automake autoconf libtool pkgconfig autoconf-archive | ||
libxml2 | ||
languageMachines.ticcutils | ||
languageMachines.timbl | ||
]; | ||
patches = [ ./mbt-add-libxml2-dep.patch ]; | ||
preConfigure = '' | ||
sh bootstrap.sh | ||
''; | ||
|
||
meta = with stdenv.lib; { | ||
description = "Memory Based Tagger"; | ||
homepage = https://languagemachines.github.io/mbt/; | ||
license = licenses.gpl3; | ||
platforms = platforms.all; | ||
maintainers = with maintainers; [ roberth ]; | ||
|
||
longDescription = '' | ||
MBT is a memory-based tagger-generator and tagger in one. The tagger-generator part can generate a sequence tagger on the basis of a training set of tagged sequences; the tagger part can tag new sequences. MBT can, for instance, be used to generate part-of-speech taggers or chunkers for natural language processing. It has also been used for named-entity recognition, information extraction in domain-specific texts, and disfluency chunking in transcribed speech. | ||
Mbt is used by Frog for Dutch tagging. | ||
''; | ||
}; | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
{ callPackage }: | ||
{ | ||
ticcutils = callPackage ./ticcutils.nix { }; | ||
libfolia = callPackage ./libfolia.nix { }; | ||
ucto = callPackage ./ucto.nix { }; | ||
uctodata = callPackage ./uctodata.nix { }; | ||
timbl = callPackage ./timbl.nix { }; | ||
timblserver = callPackage ./timblserver.nix { }; | ||
mbt = callPackage ./mbt.nix { }; | ||
frog = callPackage ./frog.nix { }; | ||
frogdata = callPackage ./frogdata.nix { }; | ||
|
||
test = callPackage ./test.nix { }; | ||
} |
5 changes: 5 additions & 0 deletions
5
pkgs/development/libraries/languagemachines/release-info/LanguageMachines-frog.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"version": "v0.13.7", | ||
"url": "https://api.github.com/repos/LanguageMachines/frog/tarball/v0.13.7", | ||
"sha256": "0swyfi3g862n888qj8v8kd18745hasy0vnc70i9qlv0ji0321bnf" | ||
} |
5 changes: 5 additions & 0 deletions
5
pkgs/development/libraries/languagemachines/release-info/LanguageMachines-frogdata.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"version": "v0.13", | ||
"url": "https://api.github.com/repos/LanguageMachines/frogdata/tarball/v0.13", | ||
"sha256": "13mhv8qacl0n20ddl1ay49xi6h2m0a149ya3rrsmaah3x4adb4sg" | ||
} |
5 changes: 5 additions & 0 deletions
5
pkgs/development/libraries/languagemachines/release-info/LanguageMachines-libfolia.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"version": "v1.7", | ||
"url": "https://api.github.com/repos/LanguageMachines/libfolia/tarball/v1.7", | ||
"sha256": "0hpxdry7n2887klryc587xv46p6z6jp6hz9x7k2pk5v7jb0z4s65" | ||
} |
5 changes: 5 additions & 0 deletions
5
pkgs/development/libraries/languagemachines/release-info/LanguageMachines-mbt.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"version": "v3.2.16", | ||
"url": "https://api.github.com/repos/LanguageMachines/mbt/tarball/v3.2.16", | ||
"sha256": "0f9f5l84m0lmmv4km9myn3yhy67jbmk3qn2fi40dy025gx4l0x3x" | ||
} |
5 changes: 5 additions & 0 deletions
5
pkgs/development/libraries/languagemachines/release-info/LanguageMachines-ticcutils.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"version": "v0.15", | ||
"url": "https://api.github.com/repos/LanguageMachines/ticcutils/tarball/v0.15", | ||
"sha256": "0lssb1klx2flmr6fy78j37i5lbq3gfhzjx24j6n72ndm2rvprvcn" | ||
} |
5 changes: 5 additions & 0 deletions
5
pkgs/development/libraries/languagemachines/release-info/LanguageMachines-timbl.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"version": "v6.4.9", | ||
"url": "https://api.github.com/repos/LanguageMachines/timbl/tarball/v6.4.9", | ||
"sha256": "1279npc3xlq05hnkylpbkgg941gjhvl6sd5fw4vgwcx2rwmmlaay" | ||
} |
5 changes: 5 additions & 0 deletions
5
pkgs/development/libraries/languagemachines/release-info/LanguageMachines-timblserver.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"version": "v1.11", | ||
"url": "https://api.github.com/repos/LanguageMachines/timblserver/tarball/v1.11", | ||
"sha256": "02k8c704wr5miy82w6zj0imm7sdfnxf3db34qiaa8l3myhn17qlw" | ||
} |
5 changes: 5 additions & 0 deletions
5
pkgs/development/libraries/languagemachines/release-info/LanguageMachines-ucto.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"version": "v0.9.6", | ||
"url": "https://api.github.com/repos/LanguageMachines/ucto/tarball/v0.9.6", | ||
"sha256": "0fxq4j32g7kp6789xz23651c4v2j7zlz87cshfv9g1xjs7jxns3f" | ||
} |
5 changes: 5 additions & 0 deletions
5
pkgs/development/libraries/languagemachines/release-info/LanguageMachines-uctodata.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"version": "v0.4", | ||
"url": "https://api.github.com/repos/LanguageMachines/uctodata/tarball/v0.4", | ||
"sha256": "02c78qmwi9ijpk5wila3p62fmfdy1rpmlvvzbxs3wg0rdb0nwvd2" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
{ runCommand | ||
, languageMachines | ||
}: | ||
|
||
runCommand "frog-test" {} '' | ||
${languageMachines.frog}/bin/frog >$out <<EOF | ||
Dit is een test | ||
EOF | ||
echo "Frog output:" | ||
cat $out | ||
expected () { | ||
echo "Test expectation failed: $@" | ||
exit 1 | ||
} | ||
lines="$(wc -l $out | awk '{print $1}')" | ||
test 5 = $lines || expected "Five lines of output" | ||
grep "is" $out | grep "zijn" >/dev/null || expected "Stemming works" | ||
grep "een" $out | grep "onbep" >/dev/null || expected "Tagging works" | ||
deps="$(echo $(awk 'BEGIN { FS = "\t*" } ; {print $1 " -> " $9 "; "}' <$out))" | ||
test "1 -> 2; 2 -> 0; 3 -> 4; 4 -> 2; -> ;" = "$deps" || expected "Dependency parsing works" | ||
'' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
{ stdenv, fetchurl | ||
, automake, autoconf, libtool, pkgconfig, autoconf-archive | ||
, libxml2, zlib, bzip2, libtar }: | ||
|
||
let | ||
release = builtins.fromJSON (builtins.readFile ./release-info/LanguageMachines-ticcutils.json); | ||
in | ||
|
||
stdenv.mkDerivation { | ||
name = "ticcutils"; | ||
version = release.version; | ||
src = fetchurl { inherit (release) url sha256; | ||
name = "ticcutils-${release.version}.tar.gz"; }; | ||
buildInputs = [ automake autoconf libtool pkgconfig autoconf-archive libxml2 | ||
# optional: | ||
zlib bzip2 libtar | ||
# broken but optional: boost | ||
]; | ||
preConfigure = "sh bootstrap.sh"; | ||
|
||
meta = with stdenv.lib; { | ||
description = "This module contains useful functions for general use in the TiCC software stack and beyond."; | ||
homepage = https://github.com/LanguageMachines/ticcutils; | ||
license = licenses.gpl3; | ||
platforms = platforms.all; | ||
maintainers = with maintainers; [ roberth ]; | ||
}; | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
{ stdenv, fetchurl | ||
, automake, autoconf, libtool, pkgconfig, autoconf-archive | ||
, libxml2 | ||
, languageMachines | ||
}: | ||
|
||
let | ||
release = builtins.fromJSON (builtins.readFile ./release-info/LanguageMachines-timbl.json); | ||
in | ||
|
||
stdenv.mkDerivation { | ||
name = "timbl"; | ||
version = release.version; | ||
src = fetchurl { inherit (release) url sha256; | ||
name = "timbl-${release.version}.tar.gz"; }; | ||
buildInputs = [ automake autoconf libtool pkgconfig autoconf-archive | ||
libxml2 | ||
languageMachines.ticcutils | ||
]; | ||
preConfigure = "sh bootstrap.sh"; | ||
|
||
meta = with stdenv.lib; { | ||
description = "TiMBL implements several memory-based learning algorithms"; | ||
homepage = https://github.com/LanguageMachines/timbl/; | ||
license = licenses.gpl3; | ||
platforms = platforms.all; | ||
maintainers = with maintainers; [ roberth ]; | ||
|
||
longDescription = '' | ||
TiMBL is an open source software package implementing several memory-based learning algorithms, among which IB1-IG, an implementation of k-nearest neighbor classification with feature weighting suitable for symbolic feature spaces, and IGTree, a decision-tree approximation of IB1-IG. All implemented algorithms have in common that they store some representation of the training set explicitly in memory. During testing, new cases are classified by extrapolation from the most similar stored cases. | ||
For over fifteen years TiMBL has been mostly used in natural language processing as a machine learning classifier component, but its use extends to virtually any supervised machine learning domain. Due to its particular decision-tree-based implementation, TiMBL is in many cases far more efficient in classification than a standard k-nearest neighbor algorithm would be. | ||
''; | ||
}; | ||
|
||
} |
37 changes: 37 additions & 0 deletions
37
pkgs/development/libraries/languagemachines/timblserver.nix
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
{ stdenv, fetchurl | ||
, automake, autoconf, libtool, pkgconfig, autoconf-archive | ||
, libxml2 | ||
, languageMachines | ||
}: | ||
|
||
let | ||
release = builtins.fromJSON (builtins.readFile ./release-info/LanguageMachines-timblserver.json); | ||
in | ||
|
||
stdenv.mkDerivation { | ||
name = "timblserver"; | ||
version = release.version; | ||
src = fetchurl { inherit (release) url sha256; | ||
name = "timblserver-${release.version}.tar.gz"; }; | ||
buildInputs = [ automake autoconf libtool pkgconfig autoconf-archive | ||
libxml2 | ||
languageMachines.ticcutils | ||
languageMachines.timbl | ||
]; | ||
preConfigure = "sh bootstrap.sh"; | ||
|
||
meta = with stdenv.lib; { | ||
description = "This server for TiMBL implements several memory-based learning algorithms"; | ||
homepage = https://github.com/LanguageMachines/timblserver/; | ||
license = licenses.gpl3; | ||
platforms = platforms.all; | ||
maintainers = with maintainers; [ roberth ]; | ||
|
||
longDescription = '' | ||
This implements a server for TiMBL. TiMBL is an open source software package implementing several memory-based learning algorithms, among which IB1-IG, an implementation of k-nearest neighbor classification with feature weighting suitable for symbolic feature spaces, and IGTree, a decision-tree approximation of IB1-IG. All implemented algorithms have in common that they store some representation of the training set explicitly in memory. During testing, new cases are classified by extrapolation from the most similar stored cases. | ||
For over fifteen years TiMBL has been mostly used in natural language processing as a machine learning classifier component, but its use extends to virtually any supervised machine learning domain. Due to its particular decision-tree-based implementation, TiMBL is in many cases far more efficient in classification than a standard k-nearest neighbor algorithm would be. | ||
''; | ||
}; | ||
|
||
} |
Oops, something went wrong.