diff --git a/.gitignore b/.gitignore
index b2355070..b3d95092 100644
--- a/.gitignore
+++ b/.gitignore
@@ -61,19 +61,19 @@
/src/cg3/functions.cg3
/src/cg3/generated-tag-list.cg3
/src/cg3/valency-postspell.cg3
-/src/filters/*-tags.txt
-/src/filters/*area-*.regex
-/src/filters/remove-all*.regex
-/src/filters/remove-homonymy-tags.regex
-/src/filters/remove-usage-tags.regex
-/src/fst/*-error-log.txt
-/src/fst/*.tmp.*
+/src/fst/filters/*-tags.txt
+/src/fst/filters/*area-*.regex
+/src/fst/filters/remove-all*.regex
+/src/fst/filters/remove-homonymy-tags.regex
+/src/fst/filters/remove-usage-tags.regex
+/src/fst/morphology/*-error-log.txt
+/src/fst/morphology/*.tmp.*
/src/fst/generated_files/*.lexc
-/src/fst/lexicon*
-/src/fst/url.lexc
-/src/orthography/*-nfc2nfd.*
-/src/orthography/*-nfd2nfc.*
-/src/phonetics/tests/*.sh
+/src/fst/morphology/lexicon*
+/src/fst/morphology/url.lexc
+/src/fst/orthography/*-nfc2nfd.*
+/src/fst/orthography/*-nfd2nfc.*
+/src/fst/phonetics/tests/*.sh
/test/run-morph-tester.sh
/test/run-yaml-testcases.sh
/test/src/morphology/all*.txt
@@ -148,3 +148,5 @@ Makefile.in
build
bygg
generated*
+.deps
+.generated
diff --git a/m4/giella-config-files.m4 b/m4/giella-config-files.m4
index c9670e2b..8745b665 100644
--- a/m4/giella-config-files.m4
+++ b/m4/giella-config-files.m4
@@ -8,15 +8,16 @@ AC_CONFIG_FILES([Makefile \
giella-est.pc \
manifest.toml \
src/Makefile \
- src/filters/Makefile \
- src/hyphenation/Makefile \
+ src/fst/filters/Makefile \
+ src/fst/syllabification/Makefile \
src/fst/Makefile \
- src/orthography/Makefile \
- src/phonetics/Makefile \
- src/phonetics/tests/Makefile \
+ src/fst/morphology/Makefile \
+ src/fst/orthography/Makefile \
+ src/fst/phonetics/Makefile \
+ src/fst/phonetics/tests/Makefile \
src/cg3/Makefile \
- src/tagsets/Makefile \
- src/transcriptions/Makefile \
+ src/fst/tagsets/Makefile \
+ src/fst/transcriptions/Makefile \
docs/Makefile \
test/Makefile \
test/tools/Makefile \
@@ -64,8 +65,8 @@ AC_CONFIG_FILES([Makefile \
# Add one AC_CONFIG_FILES for each script file that needs processing. This gives
# the most pleasant user experience and most readable autoconf code to maintain.
# Spell checker tests, all languages:
-AC_CONFIG_FILES([src/phonetics/tests/run_tests.sh],
- [chmod a+x src/phonetics/tests/run_tests.sh])
+AC_CONFIG_FILES([src/fst/phonetics/tests/run_tests.sh],
+ [chmod a+x src/fst/phonetics/tests/run_tests.sh])
AC_CONFIG_FILES([test/tools/spellcheckers/test-zhfst-file.sh], \
[chmod a+x test/tools/spellcheckers/test-zhfst-file.sh])
AC_CONFIG_FILES([test/tools/spellcheckers/fstbased/desktop/hfst/test-zhfst-basic-sugg-speed.sh], \
diff --git a/m4/giella-macros.m4 b/m4/giella-macros.m4
index 2d7500c5..99092492 100644
--- a/m4/giella-macros.m4
+++ b/m4/giella-macros.m4
@@ -88,7 +88,7 @@ AC_MSG_RESULT([$GIELLA_CORE])
###############################################################
### This is the version of the Giella Core that we require. ###
### UPDATE AS NEEDED.
-_giella_core_min_version=0.20.1
+_giella_core_min_version=0.21.0
# GIELLA_CORE/GTCORE env. variable, required by the infrastructure to find scripts:
AC_ARG_VAR([GIELLA_CORE], [directory for the Giella infra core scripts and other required resources])
@@ -845,9 +845,9 @@ AC_ARG_ENABLE([abbr],
[enable_abbr=$enableval],
[enable_abbr=no])
AS_IF([test x$enable_abbr != xno -a \
- "$(find ${srcdir}/src/fst/stems/ -name "abbreviations.lexc" | head -n 1)" = "" ],
+ "$(find ${srcdir}/src/fst/morphology/stems/ -name "abbreviations.lexc" | head -n 1)" = "" ],
[AC_MSG_ERROR([You asked for abbr.txt generation, but have no file \
-src/fst/stems/abbreviations.lexc])])
+src/fst/morphoogy/stems/abbreviations.lexc])])
AS_IF([test x$enable_abbr = xyes -a x$enable_generators = xno],
[AC_MSG_ERROR([You need to enable generators to build the abbr file])])
AM_CONDITIONAL([WANT_ABBR], [test "x$enable_abbr" != xno])
@@ -1002,7 +1002,7 @@ To build, test and install:
make install
EOF
AS_IF([test x$gt_prog_xslt = xno -a \
- "$(find ${srcdir}/src/fst/stems -name "*.xml" | head -n 1)" != "" ],
+ "$(find ${srcdir}/src/fst/morphology/stems -name "*.xml" | head -n 1)" != "" ],
[AC_MSG_WARN([You have XML source files, but XML transformation to LexC is
disabled. Please check the output of configure to locate any problems. The LexC
files will still compile though.
@@ -1048,5 +1048,7 @@ cd ..
git clone git@github.com:giellalt/$gt_SHARED_FAILS
cd $gt_SHARED_FAILS
./autogen.sh && ./configure && make])])
+AC_MSG_WARN([January 2024: the lexc files and fsts have been moved up to src/fst/morphology])
]) # gt_PRINT_FOOTER
+
# vim: set ft=config:
diff --git a/src/filters/Makefile.am.orig b/src/filters/Makefile.am.orig
deleted file mode 100644
index 2eb6ed77..00000000
--- a/src/filters/Makefile.am.orig
+++ /dev/null
@@ -1,69 +0,0 @@
-## Process this file with automake to produce Makefile.in
-
-## Copyright (C) 2011 Samediggi
-
-## This program is free software: you can redistribute it and/or modify
-## it under the terms of the GNU General Public License as published by
-## the Free Software Foundation, either version 3 of the License, or
-## (at your option) any later version.
-
-## This program is distributed in the hope that it will be useful,
-## but WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-## GNU General Public License for more details.
-
-## You should have received a copy of the GNU General Public License
-## along with this program. If not, see .
-
-#########################################################
-############## BEGIN: Local modifications ###############
-
-# List any local filter regex files here:
-GIELLA_FILTER_LOCAL_REGEX_SRCS=\
- downcase_UCletters.regex
-
-# List any local filter xfscript files here:
-GIELLA_FILTER_LOCAL_XFSCRIPT_SRCS=reorder-tags.est.xfscript \
- remove-sg-forms.est.xfscript \
- remove-pl-forms.est.xfscript \
- remove-sg-nom-forms.est.xfscript \
- remove-non-gi-forms.est.xfscript \
- remove-usage-tags.est.xfscript \
- remove-nospell-words.est.xfscript \
- remove-NotNorm-wordforms.est.xfscript \
- modify-derivations.est.xfscript \
- block-derivations.est.xfscript \
- block-compounds.est.xfscript \
- wordpair-filter.est.xfscript \
- numeral-filter.est.xfscript \
- evaluate-flags.est.xfscript \
- downcase-derived_proper-strings.est.xfscript \
- upcase-guessed-names.est.xfscript \
- remove-guessed-forms.est.xfscript
-
-# List any local filter lexc files here:
-GIELLA_FILTER_LOCAL_LEXC_SRCS=
-
-# List any locally generated regex source files here:
-GIELLA_FILTER_LOCAL_GENERATED_REGEX_SRCS=
-
-# List any locally generated xfscript source files here:
-GIELLA_FILTER_LOCAL_GENERATED_XFSCRIPT_SRCS=
-
-# List any locally generated lexc source files here:
-GIELLA_FILTER_LOCAL_GENERATED_LEXC_SRCS=
-
-# List any additional source files here, so that they are included in the dist.
-# Source files that are not directly compiled to fst's but are instead used as
-# part of a local build step should be listed here.
-EXTRA_SRCS=
-
-########## Add local build rules below here: ############
-
-############### END: Local modifications ################
-#########################################################
-
-# Included build file, where the actual build instructions are:
-include $(top_srcdir)/../giella-core/am-shared/src-filters-dir-include.am
-
-# vim: set ft=automake:
diff --git a/src/fst/Makefile.am b/src/fst/Makefile.am
index f0f39a5b..d599b796 100644
--- a/src/fst/Makefile.am
+++ b/src/fst/Makefile.am
@@ -1,197 +1,1091 @@
## Process this file with automake to produce Makefile.in
+## Copyright: Sámediggi/Divvun/UiT
+## Licence: GPL v3+
-## Copyright (C) 2011 Samediggi
-
-## This program is free software: you can redistribute it and/or modify
-## it under the terms of the GNU General Public License as published by
-## the Free Software Foundation, either version 3 of the License, or
-## (at your option) any later version.
-
-## This program is distributed in the hope that it will be useful,
-## but WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-## GNU General Public License for more details.
-
-## You should have received a copy of the GNU General Public License
-## along with this program. If not, see .
-
-# Add language-specific flags for hfst-lexc compilation here:
-if HAVE_SHARED_COMMON
-HFST_LEXC_LOCAL_FLAGS= # --Werror # uncomment if lexc is good enough
-else
-HFST_LEXC_LOCAL_FLAGS= # No --Werror if deps are missing !
-endif
-
-
-####### Morphology source file defs: ########
-
-# Set this to name of lexc file containing Multichar_Symbols and LEXICON Root
-GT_LEXC_ROOT=$(srcdir)/root.lexc
-
-# Set this to the names of all regular lexc source files:
-GT_LEXC_SRCS_L1_L2=\
- stems/abbreviations.lexc \
- stems/adjectives.lexc \
- stems/noninflecting_adjectives.lexc \
- stems/comparative_adjectives.lexc \
- stems/superlative_adjectives.lexc \
- stems/adpositions.lexc \
- stems/adverbs.lexc \
- stems/conjunctions.lexc \
- stems/genitive_attributes.lexc \
- stems/interjections.lexc \
- stems/nouns.lexc \
- stems/cardinalnumerals.lexc \
- stems/ordinalnumerals.lexc \
- stems/pronouns.lexc \
- stems/propernouns.lexc \
- stems/verbs.lexc \
- stems/noninflecting_verbs.lexc \
- stems/prefixes.lexc \
- stems/final_components.lexc \
- stems/numbers.lexc \
- stems/acronyms.lexc \
- stems/symbol_strings.lexc \
- affixes/regular_declinations.lexc \
- affixes/exceptional_declinations.lexc \
- affixes/verbs.lexc \
- affixes/gi.lexc
-
-
-# If you are building an error-detecting L2 analyser, specify the lexc files
-# that differ between the regular L1 and the L2 analysers below, in L1 and
-# L2 respectively. L2 files must end in "*-L2.lexc". See SME for an example.
-L1=
-
-L2=
-
-GT_LEXC_SRCS=\
- $(GT_LEXC_SRCS_L1_L2) \
- $(L1)
-
-GT_LEXC_L2_SRCS=\
- $(GT_LEXC_SRCS_L1_L2) \
- $(L2)
-
-# Set this to the names of all generated lexc files, if any
-GENERATED_LEXC_SRCS=generated_files/mul-$(GLANG)-punctuation.lexc \
- generated_files/mul-$(GLANG)-symbols.lexc
-
-# change handling of shared lexical data here:
-if HAVE_SHARED_COMMON
-url.tmp.lexc: $(gt_SHARED_common)/src/fst/url.lexc
- $(AM_V_CP)cp -f $< $@
-
-generated_files/mul-$(GLANG)-%.lexc: $(gt_SHARED_common)/src/fst/stems/%.lexc
- $(AM_V_at)$(MKDIR_P) generated_files
- $(AM_V_CP)cp -f $< $@
-else
-# this is "safe" fallback (compiles but you miss everything)
-url.tmp.lexc:
- echo "LEXICON Root" > $@
- echo "< h t t p (s) %: %/ %/ ?*> # ;" >> $@
-
-generated_files/mul-$(GLANG)-%.lexc:
- $(AM_V_at)$(MKDIR_P) generated_files
- echo "! Missing shared common data" > $@
-endif
-# add other lexical shared data handling here
-
-# Set this to the names of all source xml files, if any
-GT_XML_SRCS=
-
-# Define any additional lexc sources here (compiled on their own):
-GT_LOCAL_SRCS=\
- pair_initial.tmp.lexc \
- pair_final.tmp.lexc \
- num_initial.tmp.lexc \
- num_final.tmp.lexc \
- abbrevdot.tmp.lexc \
- guesser-simplex-nouns.tmp.lexc \
- guesser-names.tmp0.lexc
-
-# guesser-derivations.tmp.lexc
-# punctuation.tmp.lexc
-# acronyms.tmp.lexc
-
-# Define local xfscripts here:
-GT_LOCAL_XFSCRIPT_SRCS=\
- emoticon.xfscript
-
-# Define here any additional sources just included in the distro:
-GT_DISTRO_SRCS=
-
-### BEGIN: Local processing: ###
-EST_AFFIX_FILES=$(srcdir)/affixes/regular_declinations \
- $(srcdir)/affixes/exceptional_declinations \
- $(srcdir)/affixes/verbs \
- $(srcdir)/affixes/gi
-
-# make the parts inflect
-# by re-using root.lexc and affixes, and omitting all the stem lexicons
-# ... and remove the flag diacritics from initial parts (why? because otherwise the filter that
-# puts the initial and final part together doesn't work ?)
+# always build . last here, and tagsets have to be built after morphology
+SUBDIRS = morphology filters phonetics syllabification orthography transcriptions tagsets .
+
+####### Automake targets: ########
+
+# Define target variables first, before assigning to them:
+GT_ANALYSERS=
+GT_GENERATORS=
+CUSTOM_FSTS=
+
+#### Local modifications in *fst processing: ####
+####
+#### Copy the fallback targets, and rename them to the desired targets. Then:
+#### Replace the 'cp' command (Xerox) / Prepend the hfst-invert command (Hfst -
+#### remember to move the $<) with whatever you need to complete
+#### the processing to get the final target transducer.
+#### Remember to add the dependencies as well.
+#### Also make sure that HFST and Xerox processing are the same.
+####
+#### If you add new transducers to be built, you need to add them to the
+#### relevant variable, e.g.:
+####
+#### if CAN_HFST
+#### GT_GENERATORS+=generator-oahpa-gt-norm.hfst
+#### endif
+####
+#### NB!!!! The HFST targets should get a hyphen after 'analyser'/'generator'
+#### respectively, to make the local targets minimally different from and
+#### slightly more specific than the fallback targets. This is to avoid warnings
+#### about duplicate targets. That is, the local targets should looke like:
+####
+#### analyser-%.hfst: analyser-%.tmp.hfst
+#### generator-%.hfst: generator-%.tmp.hfst
+
+##################################################################
+#### BEGIN: Add local processing instructions BELOW this line ####
+##################################################################
+
+########################################################
+#### Add language-specific transducer targets here: ####
+
+#### Xerox transducers:
+if CAN_XFST
+GT_ANALYSERS+=analyser-gt-desc.xfst \
+ analyser-gt-norm.xfst \
+ analyser-disamb-gt-desc.xfst
+GT_GENERATORS+=generator-gt-desc.xfst \
+ generator-gt-norm.xfst
+
+if WANT_CUSTOM_FSTS
+CUSTOM_FSTS+=
+endif # WANT_CUSTOM_FSTS
+
+endif # CAN_XFST
+
+#### HFST transducers
+if CAN_HFST
+GT_ANALYSERS+=analyser-gt-desc.hfst \
+ analyser-gt-norm.hfst \
+ analyser-gt-desc.hfst \
+ analyser-gt-descguess.hfst \
+ analyser-gt-guess.hfst \
+ analyser-disamb-gt-desc.hfst
+GT_GENERATORS+=generator-gt-desc.hfst \
+ generator-gt-norm.hfst \
+ generator-gt-desc.hfst \
+ generator-gt-descguess.hfst \
+ generator-gt-guess.hfst
+
+if WANT_CUSTOM_FSTS
+CUSTOM_FSTS+=
+endif # WANT_CUSTOM_FSTS
+
+endif # CAN_HFST
+
+#### FOMA transducers
+if CAN_FOMA
+GT_ANALYSERS+=
+GT_GENERATORS+=
+
+if WANT_CUSTOM_FSTS
+CUSTOM_FSTS+=
+endif # WANT_CUSTOM_FSTS
+
+endif # CAN_FOMA
+
+#################################################
+#### Add language-specific build rules here: ####
+
+EST_EXTRA_PRE_FILTERS=filters/remove-sg-forms.est filters/remove-pl-forms.est filters/remove-sg-nom-forms.est filters/remove-non-gi-forms.est filters/block-derivations.est # filters/remove-usage-tags.est
+#EST_GUESSER_PRE_FILTERS=filters/block-guesser-derivations.est
+EST_EXTRA_POST_FILTERS=filters/modify-derivations.est # filters/downcase-derived_proper-strings.est
+EST_WORDPAIR_FILTERS=filters/reorder-tags.est filters/wordpair-filter.est
+EST_NUMERAL_FILTERS=filters/reorder-tags.est filters/numeral-filter.est
+#EST_COMPOUND_PRE_FILTERS=filters/block-compounds.est
+#EST_COMPOUND_POST_FILTERS=filters/compound-filter.est
+
+# paired words are words whose both parts inflect, e.g. emb-kumb, kihin-kahin
+# they are compiled into a transducer of its own:
+# initial_part final_part
+# this transducer is union-ed with the transducer of the rest of vocabulary (?)
+
+# paired words: initial part
+# HFST:
+pair_initial.tmp1.hfst: fst/pair_initial.tmp.hfst \
+ fst/phonology.compose.hfst
+ $(AM_V_INTRSCT)\
+ $(HFST_DETERMINIZE) $(MORE_VERBOSITY) $(HFST_FLAGS) $<\
+ | $(HFST_MINIMIZE) $(MORE_VERBOSITY) $(HFST_FLAGS) \
+ | $(HFST_COMPOSE_INTERSECT) $(COMPOSE_INTERSECT_FLAG) \
+ $(MORE_VERBOSITY) $(HFST_FLAGS) \
+ -2 fst/phonology.compose.hfst \
+ | $(HFST_MINIMIZE) $(MORE_VERBOSITY) $(HFST_FLAGS) \
+ -o $@
+
+# XEROX
+pair_initial.tmp1.xfst: fst/pair_initial.tmp.xfst \
+ fst/phonology.compose.xfst
+ $(AM_V_LEXC)$(PRINTF) \
+ "read-source fst/pair_initial.tmp.xfst\n\
+ read-rules fst/phon.compose.xfst\n\
+ compose-result\n\
+ save-result $@\n\
+ quit\n" \
+ | $(LEXC) $(VERBOSITY)
+
+# compound numerals where both parts inflect, e.g. viis#sada, viie#saja
+# they are compiled into a transducer of its own
+# this transducer is union-ed with the transducer of the simplex words
+
+# compound numerals: initial part
+# HFST:
+num_initial.tmp1.hfst: fst/num_initial.tmp.hfst \
+ fst/phonology.compose.hfst
+ $(AM_V_INTRSCT)\
+ $(HFST_DETERMINIZE) $(MORE_VERBOSITY) $(HFST_FLAGS) $<\
+ | $(HFST_MINIMIZE) $(MORE_VERBOSITY) $(HFST_FLAGS) \
+ | $(HFST_COMPOSE_INTERSECT) $(COMPOSE_INTERSECT_FLAG) \
+ $(MORE_VERBOSITY) $(HFST_FLAGS) \
+ -2 fst/phonology.compose.hfst \
+ | $(HFST_MINIMIZE) $(MORE_VERBOSITY) $(HFST_FLAGS) \
+ -o $@
+
+# XEROX
+num_initial.tmp1.xfst: fst/num_initial.tmp.xfst \
+ fst/phonology.compose.xfst
+ $(AM_V_LEXC)$(PRINTF) \
+ "read-source fst/num_initial.tmp.xfst\n\
+ read-rules fst/phonology.compose.xfst\n\
+ compose-result\nsave-result $@\n\
+ quit\n" \
+ | $(LEXC) $(VERBOSITY)
+
+
+# paired words: final part
+# HFST:
+pair_final.tmp1.hfst: fst/pair_final.tmp.hfst \
+ fst/phonology.compose.hfst
+ $(AM_V_INTRSCT)\
+ $(HFST_DETERMINIZE) $(MORE_VERBOSITY) $(HFST_FLAGS) $<\
+ | $(HFST_MINIMIZE) $(MORE_VERBOSITY) $(HFST_FLAGS) \
+ | $(HFST_COMPOSE_INTERSECT) $(COMPOSE_INTERSECT_FLAG) \
+ $(MORE_VERBOSITY) $(HFST_FLAGS) \
+ -2 fst/phonology.compose.hfst \
+ | $(HFST_MINIMIZE) $(MORE_VERBOSITY) $(HFST_FLAGS) \
+ -o $@
+
+# XEROX
+pair_final.tmp1.xfst: fst/pair_final.tmp.xfst \
+ fst/phonology.compose.xfst
+ $(AM_V_LEXC)$(PRINTF) \
+ "read-source fst/pair_final.tmp.xfst\n\
+ read-rules fst/phonology.compose.xfst\n\
+ compose-result\n\
+ save-result $@\n\
+ quit\n" \
+ | $(LEXC) $(VERBOSITY)
+
+# compound numerals: final part
+# HFST:
+num_final.tmp1.hfst: fst/num_final.tmp.hfst \
+ fst/phonology.compose.hfst
+ $(AM_V_INTRSCT)\
+ $(HFST_DETERMINIZE) $(MORE_VERBOSITY) $(HFST_FLAGS) $<\
+ | $(HFST_MINIMIZE) $(MORE_VERBOSITY) $(HFST_FLAGS) \
+ | $(HFST_COMPOSE_INTERSECT) $(COMPOSE_INTERSECT_FLAG) \
+ $(MORE_VERBOSITY) $(HFST_FLAGS) \
+ -2 fst/phonology.compose.hfst \
+ | $(HFST_MINIMIZE) $(MORE_VERBOSITY) $(HFST_FLAGS) \
+ -o $@
+
+# XEROX
+num_final.tmp1.xfst: fst/num_final.tmp.xfst \
+ fst/phonology.compose.xfst
+ $(AM_V_LEXC)$(PRINTF) \
+ "read-source fst/num_final.tmp.xfst\n\
+ read-rules fst/phonology.compose.xfst\n\
+ compose-result\n\
+ save-result $@\n\
+ quit\n" \
+ | $(LEXC) $(VERBOSITY)
+
+# guesser
+
+# guesser for simplex words
+# create draft "phonological" name patterns:
+# select only nouns, tag them as proper nouns,
+# and upcase both the lexical and the surface side
+# (perhaps this could be done more elegantly, i.e. in some other dir and/or makefile)
+fst/guesser-names.tmp.hfst: fst/guesser-names.tmp0.hfst filters/upcase-guessed-names.est.hfst
+ $(AM_V_XFST_TOOL)$(PRINTF) "set flag-is-epsilon OFF\n\
+ read regex \
+ [ \"+Guess\" \"+N\" \"+Prop\" <- \"+Guess\" \"+N\" ] \
+ .o. \$$[\"+Guess\" \"+N\"] \
+ .o. @\"filters/upcase-guessed-names.est.hfst\".i \
+ .o. @\"$<\" \
+ .o. @\"filters/upcase-guessed-names.est.hfst\" \
+ ;\n\
+ save stack $@\n\
+ quit\n" | $(XFST_TOOL)
+
+
+# phonological simplex word patterns with inflections
+guesser-simplex-nouns.tmp1.hfst: fst/guesser-simplex-nouns.tmp.hfst \
+ fst/phonology.compose.hfst
+ $(AM_V_INTRSCT)\
+ $(HFST_DETERMINIZE) $(MORE_VERBOSITY) $(HFST_FLAGS) $<\
+ | $(HFST_MINIMIZE) $(MORE_VERBOSITY) $(HFST_FLAGS) \
+ | $(HFST_COMPOSE_INTERSECT) $(COMPOSE_INTERSECT_FLAG) \
+ $(MORE_VERBOSITY) $(HFST_FLAGS) \
+ -2 fst/phonology.compose.hfst \
+ | $(HFST_MINIMIZE) $(MORE_VERBOSITY) $(HFST_FLAGS) \
+ -o $@
+
+# phonological name patterns with inflections
+guesser-names.tmp1.hfst: fst/guesser-names.tmp.hfst \
+ fst/phonology.compose.hfst
+ $(AM_V_INTRSCT)\
+ $(HFST_DETERMINIZE) $(MORE_VERBOSITY) $(HFST_FLAGS) $<\
+ | $(HFST_MINIMIZE) $(MORE_VERBOSITY) $(HFST_FLAGS) \
+ | $(HFST_COMPOSE_INTERSECT) $(COMPOSE_INTERSECT_FLAG) \
+ $(MORE_VERBOSITY) $(HFST_FLAGS) \
+ -2 fst/phonology.compose.hfst \
+ | $(HFST_MINIMIZE) $(MORE_VERBOSITY) $(HFST_FLAGS) \
+ -o $@
+
+
+# XEROX
+# not implemented...
+
+# acronyms
+# acronyms.tmp1.%: fst/acronyms.tmp.%
+# cp $< $@
+
+# FOMA
+# not implemented...
+
+# HFST: generator
+# Xerox & FOMA: analyser
+# (with a language-specific tag reordering script applied)
+pair_initial.tmp.%: pair_initial.tmp1.% \
+ filters/reorder-tags.$(GTLANG).% \
+ filters/reorder-semantic-tags.% \
+ filters/reorder-subpos-tags.% \
+ filters/remove-mwe-tags.%
+ $(AM_V_XFST_TOOL)$(PRINTF) "set flag-is-epsilon ON\n\
+ read regex \
+ @\"filters/reorder-tags.$(GTLANG).$*\"\
+ .o. @\"filters/reorder-subpos-tags.$*\" \
+ .o. @\"filters/reorder-semantic-tags.$*\" \
+ .o. @\"filters/remove-mwe-tags.$*\" \
+ .o. @\"$<\" \
+ ;\n\
+ save stack $@\n\
+ quit\n" | $(XFST_TOOL)
+
+num_initial.tmp.%: num_initial.tmp1.% \
+ filters/reorder-tags.$(GTLANG).% \
+ filters/reorder-semantic-tags.% \
+ filters/reorder-subpos-tags.% \
+ filters/remove-mwe-tags.%
+ $(AM_V_XFST_TOOL)$(PRINTF) "set flag-is-epsilon ON\n\
+ read regex \
+ @\"filters/reorder-tags.$(GTLANG).$*\"\
+ .o. @\"filters/reorder-subpos-tags.$*\" \
+ .o. @\"filters/reorder-semantic-tags.$*\" \
+ .o. @\"filters/remove-mwe-tags.$*\" \
+ .o. @\"$<\" \
+ ;\n\
+ save stack $@\n\
+ quit\n" | $(XFST_TOOL)
+
+
+# HFST: generator
+# Xerox & FOMA: analyser
+# (with a language-specific tag reordering script applied)
+pair_final.tmp.%: pair_final.tmp1.% \
+ filters/reorder-tags.$(GTLANG).% \
+ filters/reorder-semantic-tags.% \
+ filters/reorder-subpos-tags.% \
+ filters/remove-mwe-tags.%
+ $(AM_V_XFST_TOOL)$(PRINTF) "set flag-is-epsilon ON\n\
+ read regex \
+ @\"filters/reorder-tags.$(GTLANG).$*\"\
+ .o. @\"filters/reorder-subpos-tags.$*\" \
+ .o. @\"filters/reorder-semantic-tags.$*\" \
+ .o. @\"filters/remove-mwe-tags.$*\" \
+ .o. @\"$<\" \
+ ;\n\
+ save stack $@\n\
+ quit\n" | $(XFST_TOOL)
+
+num_final.tmp.%: num_final.tmp1.% \
+ filters/reorder-tags.$(GTLANG).% \
+ filters/reorder-semantic-tags.% \
+ filters/reorder-subpos-tags.% \
+ filters/remove-mwe-tags.%
+ $(AM_V_XFST_TOOL)$(PRINTF) "set flag-is-epsilon ON\n\
+ read regex \
+ @\"filters/reorder-tags.$(GTLANG).$*\"\
+ .o. @\"filters/reorder-subpos-tags.$*\" \
+ .o. @\"filters/reorder-semantic-tags.$*\" \
+ .o. @\"filters/remove-mwe-tags.$*\" \
+ .o. @\"$<\" \
+ ;\n\
+ save stack $@\n\
+ quit\n" | $(XFST_TOOL)
+
+# phonological simplex word and name patterns with inflections
+# (with lexical-side tags ordered correctly)
+guesser-simplex.tmp.%: guesser-simplex-nouns.tmp1.% \
+ guesser-names.tmp1.% \
+ filters/reorder-tags.$(GTLANG).% \
+ filters/reorder-semantic-tags.% \
+ filters/reorder-subpos-tags.% \
+ filters/remove-mwe-tags.%
+ $(AM_V_XFST_TOOL)$(PRINTF) "set flag-is-epsilon ON\n\
+ read regex \
+ @\"filters/reorder-tags.$(GTLANG).$*\"\
+ .o. @\"filters/reorder-subpos-tags.$*\" \
+ .o. @\"filters/reorder-semantic-tags.$*\" \
+ .o. @\"filters/remove-mwe-tags.$*\" \
+ .o. [@\"$<\" | @\"guesser-names.tmp1.$*\" ]\
+ ;\n\
+ save stack $@\n\
+ quit\n" | $(XFST_TOOL)
+
+
+
+# HFST: generator
+# Xerox & FOMA: analyser
+#
+# concatenate initial and final part of paired words and numerals
+
+redundant_wordpairs.%: pair_final.tmp.% pair_initial.tmp.%
+ $(AM_V_XFST_TOOL)$(PRINTF) "set flag-is-epsilon ON\n\
+ read regex [ [~[?* \"+Foc/gi\" ?*] \
+ .o. @\"pair_initial.tmp.$*\"] (\"-\") [ 0:\"#\" ] @\"pair_final.tmp.$*\"] \
+ .o. ~[?* » ?*] ; \nsave stack $@\nquit\n" | $(XFST_TOOL)
+
+redundant_numerals.%: num_final.tmp.% num_initial.tmp.%
+ $(AM_V_XFST_TOOL)$(PRINTF) "set flag-is-epsilon ON\n\
+ read regex [ [~[?* \"+Foc/gi\" ?*] \
+ .o. @\"num_initial.tmp.$*\"] @\"num_final.tmp.$*\"] \
+ .o. ~[?* » ?*] ; \nsave stack $@\nquit\n" | $(XFST_TOOL)
+
+# HFST:
+# filter out ungrammatical wordforms of paired words and numerals
+# the result is a transducer that can be unioned with simple words lexicon to arrive at the set of simplex words and derivations
+
+generator-wordpairs-raw.simple.hfst: redundant_wordpairs.hfst \
+ $(EST_WORDPAIR_FILTERS:%=%.hfst)
+ $(AM_V_XFST)$(PRINTF) "set flag-is-epsilon ON\n\
+ read regex \
+ $(EST_WORDPAIR_FILTERS:%=@\"%.hfst\" .o.) \
+ @\"$<\" \
+ ;\n\
+ save stack $@\n\
+ quit\n" | $(XFST_TOOL)
+
+generator-numerals-raw.simple.hfst: redundant_numerals.hfst \
+ $(EST_NUMERAL_FILTERS:%=%.hfst)
+ $(AM_V_XFST)$(PRINTF) "set flag-is-epsilon ON\n\
+ read regex \
+ $(EST_NUMERAL_FILTERS:%=@\"%.hfst\" .o.) \
+ @\"$<\" \
+ ;\n\
+ save stack $@\n\
+ quit\n" | $(XFST_TOOL)
+
+# guesser:
+# phological patterns of simplex words and derived words
+guesser-raw.simple.hfst: guesser-simplex.tmp.hfst \
+ $(EST_EXTRA_PRE_FILTERS:%=%.hfst) \
+ $(EST_EXTRA_POST_FILTERS:%=%.hfst) \
+ filters/downcase-derived_proper-strings.est.hfst
+ $(AM_V_XFST)$(PRINTF) "set flag-is-epsilon ON\n\
+ read regex \
+ @\"filters/block-derivations.est.hfst\" \
+ .o. [ @\"$<\"] \
+ $(EST_EXTRA_POST_FILTERS:%=.o. @\"%.hfst\") \
+ ;\n\
+ define fst \n\
+ set flag-is-epsilon OFF\n\
+ read regex fst \
+ .o. @\"filters/downcase-derived_proper-strings.est.hfst\" \
+ ;\n\
+ save stack $@\n\
+ quit\n" | $(XFST_TOOL)
+
+
+# XEROX:
+analyser-wordpairs-raw.simple.xfst: redundant_wordpairs.xfst \
+ $(EST_WORDPAIR_FILTERS:%=%.xfst)
+ $(AM_V_XFST)$(PRINTF) "set flag-is-epsilon ON\n\
+ read regex \
+ $(EST_WORDPAIR_FILTERS:%=@\"%.xfst\" .o.) \
+ @\"$<\" \
+ ;\n\
+ save stack $@\n\
+ quit\n" | $(XFST) $(VERBOSITY)
+
+analyser-numerals-raw.simple.xfst: redundant_numerals.xfst \
+ $(EST_NUMERAL_FILTERS:%=%.xfst)
+ $(AM_V_XFST)$(PRINTF) "set flag-is-epsilon ON\n\
+ read regex \
+ $(EST_NUMERAL_FILTERS:%=@\"%.xfst\" .o.) \
+ @\"$<\" \
+ ;\n\
+ save stack $@\n\
+ quit\n" | $(XFST) $(VERBOSITY)
+
+# We need to add processing of language-specific tags in the analyser:
+# XEROX:
+# NB! cleanup net
+analyser-raw-gt-desc.simple.xfst: analyser-raw-gt-desc.tmp.xfst \
+ analyser-numerals-raw.simple.xfst \
+ $(EST_EXTRA_PRE_FILTERS:%=%.xfst) \
+ $(EST_EXTRA_POST_FILTERS:%=%.xfst) \
+ filters/downcase-derived_proper-strings.est.xfst
+ $(AM_V_XFST)$(PRINTF) "set flag-is-epsilon ON\n\
+ read regex \
+ $(EST_EXTRA_PRE_FILTERS:%=@\"%.xfst\" .o.) \
+ [ @\"$<\" | @\"analyser-numerals-raw.simple.xfst\" ] \
+ $(EST_EXTRA_POST_FILTERS:%=.o. @\"%.xfst\") \
+ ;\n\
+ cleanup net\n\
+ define fst \n\
+ set flag-is-epsilon OFF\n\
+ read regex fst \
+ .o. @\"filters/downcase-derived_proper-strings.est.xfst\" \
+ ;\n\
+ save stack $@\n\
+ quit\n" | $(XFST) $(VERBOSITY)
+
+# HFST:
+# 1) make a union of simple words, paired words and compound numerals
+# 2) create derivations from proper names
+# 3) filter out the incorrect derivations (derived from names, verbs, nouns etc)
+# result: lexicon-based simplex words and derivations
+# NB! includes potential compound word initial components tagged as +Guess, e.g. blabla;
+# they will be legit parts of compound words, once the compound word transducer is created
+
+generator-raw-gt-desc.simple.weightless.hfst: generator-raw-gt-desc.tmp.hfst \
+ generator-numerals-raw.simple.hfst \
+ generator-wordpairs-raw.simple.hfst \
+ $(EST_EXTRA_PRE_FILTERS:%=%.hfst) \
+ $(EST_EXTRA_POST_FILTERS:%=%.hfst) \
+ filters/downcase-derived_proper-strings.est.hfst
+ $(AM_V_XFST)$(PRINTF) "set flag-is-epsilon ON\n\
+ read regex \
+ $(EST_EXTRA_PRE_FILTERS:%=@\"%.hfst\" .o.) \
+ [ @\"$<\" | @\"generator-numerals-raw.simple.hfst\" \
+ | @\"generator-wordpairs-raw.simple.hfst\"] \
+ $(EST_EXTRA_POST_FILTERS:%=.o. @\"%.hfst\") \
+ ;\n\
+ define fst \n\
+ set flag-is-epsilon OFF\n\
+ read regex fst \
+ .o. @\"filters/downcase-derived_proper-strings.est.hfst\" \
+ ;\n\
+ save stack $@\n\
+ quit\n" | $(XFST_TOOL)
+
+# weights added to all analyses
+# result: lexicon-based simplex words and derivations with weights
+
+# no weight added to compound border '#' here; do it somewhere else
+# 7.01.2019 from Sjur:
+# Hfst - add weights to simplex words if using tropical-semiring fst format:
+if WITH_OFST_TROPICAL
+generator-raw-gt-desc.simple.hfst: generator-raw-gt-desc.simple.weightless.hfst
+ $(AM_V_REWEIGHT)$(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) \
+ -S '#' -a 0 --arcs-only -i $< \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/mine' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/ja' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/nu' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/mus' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/ng' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/v' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/tav' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/nud' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/mata' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/matu' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/tamatu' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/tu' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/tud' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/lik' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/line' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/ne' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/lt' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/sti' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/ini' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/m' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/im' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/nna' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/kond' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/ist' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/is' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/us' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/ti' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/lane' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/kas' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+N' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+A' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Num' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Pron' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+V' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Adv' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Interj' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+CC' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+CS' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Adp' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Pref' -a 5 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Prop' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Card' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Ord' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Comp' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Superl' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Sg' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Pl' -a 1 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Nom' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Gen' -a 1 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Par' -a 2 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Ill' -a 3 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Ine' -a 3 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Ela' -a 3 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+All' -a 3 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Ade' -a 3 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Abl' -a 3 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Tra' -a 3 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Trm' -a 3 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Ess' -a 3 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Abe' -a 3 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Com' -a 3 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Impers' -a 1 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Pers' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Prs' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Prt' -a 1 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Ind' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Cond' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Imprt' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Quot' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Sg1' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Sg2' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Sg3' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Pl1' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Pl2' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Pl3' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Aff' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Neg' -a 1 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Sup' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Inf' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Ger' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Prc' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Foc/gi' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Emph' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Pref' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Dim/ke' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+ABBR' -a 5 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+ACR' -a 5 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Usage/Rare' -a 30 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Usage/Hyp' -a 30 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Usage/NotNorm' -a 30 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Usage/CommonNotNorm' -a 30 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Use/Circ' -a 0 -A \
+ > $@
+
+# do somewhere else:
+# -S '#' -a 30 --arcs-only -i \
+# | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Guess' -a 200 -A \
+#
+
+else !WITH_OFST_TROPICAL
+
+generator-raw-gt-desc.simple.hfst: generator-raw-gt-desc.simple.weightless.hfst
+ cp $< $@
+endif !WITH_OFST_TROPICAL
+
+
+# HFST:
+# compound words:
+# 1. the non-final parts cannot be a form with a focus particle, so filter them out
+# 2. the non-final parts part may end with a hyphen (ajalooline+A+Der/minus:ajaloolis»-), which may be omitted in compounds,
+# or may have a hyphen appended (for better readabilty of a compound)
+# result: lexicon-based simplex words, derivations, paired words and numerals, compound words; everything with weights
+# NB! includes words where the first part is marked as +Guess, e.g. blablawords
+
+generator-raw-gt-desc.comp.hfst: generator-raw-gt-desc.simple.hfst \
+ filters/evaluate-flags.est.hfst
+ $(AM_V_XFST)$(PRINTF) "set flag-is-epsilon ON\n\
+ read regex \
+ [ \
+ [ \
+ [ \
+ ~[?* \"+Foc/gi\" ?*] .o. @\"$<\" \
+ .o. [[ \"-\" (->) 0 || » _ .#. ] | [ [..] (->) \"-\" || \\[\"-\"] _ .#. ]] \
+ ] \"#\" \
+ ]* @\"$<\" \
+ ] @\"filters/evaluate-flags.est.hfst\" \
+ ;\n\
+ save stack $@\n\
+ quit\n" | $(XFST_TOOL)
+
+
+# XFST:
+analyser-raw-gt-desc.comp.xfst: analyser-raw-gt-desc.simple.xfst \
+ filters/evaluate-flags.est.xfst
+ $(AM_V_XFST)$(PRINTF) "set flag-is-epsilon ON\n\
+ read regex \
+ [ \
+ [ \
+ [ \
+ ~[?* \"+Foc/gi\" ?*] .o. @\"$<\" \
+ .o. [[ \"-\" (->) 0 || » _ .#. ] | [ [..] (->) \"-\" || \\[\"-\"] _ .#. ]] \
+ ] \"#\" \
+ ]* @\"$<\" \
+ ] @\"filters/evaluate-flags.est.xfst\" \
+ ;\n\
+ cleanup net\n\
+ save stack $@\n\
+ quit\n" | $(XFST) $(VERBOSITY)
+
+# the vocabulary, i.e. words that might be combined with - / or otherwise
+
+# HFST:
+generator-raw-gt-desc.vocabulary.hfst: generator-raw-gt-desc.comp.hfst
+ $(AM_V_XFST)$(PRINTF) "set flag-is-epsilon ON\n\
+ read regex @\"$<\" ; \n\
+ save stack $@\n\
+ quit\n" | $(XFST_TOOL)
+
+# XEROX:
+analyser-raw-gt-desc.vocabulary.xfst: analyser-raw-gt-desc.comp.xfst \
+ analyser-wordpairs-raw.simple.xfst
+ $(AM_V_XFST)$(PRINTF) "set flag-is-epsilon ON\n\
+ read regex @\"$<\" ; \n\
+ read regex @\"analyser-wordpairs-raw.simple.xfst\" ; \n\
+ union net\n\
+ cleanup net\n\
+ save stack $@\n\
+ quit\n" | $(XFST) $(VERBOSITY)
+
+# weights added to all analyses
+# 7.01.2019 from Sjur:
+# Hfst - add weights to compounds if using tropical-semiring fst format:
+if WITH_OFST_TROPICAL
+generator-raw-gt-desc.weighted.hfst: generator-raw-gt-desc.vocabulary.hfst
+ $(AM_V_REWEIGHT)$(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) \
+ -S '#' -a 30 --arcs-only -i $< \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Guess' -a 200 -A \
+ > $@
+
+else !WITH_OFST_TROPICAL
+
+generator-raw-gt-desc.weighted.hfst: generator-raw-gt-desc.vocabulary.hfst
+ cp $< $@
+endif !WITH_OFST_TROPICAL
+
+
+# weights added to all guessed simplex word analyses
+# result: guessed simplex words and derivations with weights
+
+# analogy with generator-raw-gt-desc.weighted.hfst
+# notice that the derived forms are weighted LESS, i.e. they are guessed MORE LIKELY than simplex forms
+# Hfst - add weights to compounds if using tropical-semiring fst format:
+if WITH_OFST_TROPICAL
+guesser-raw.weighted.hfst: guesser-raw.simple.hfst
+ $(AM_V_REWEIGHT)$(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) \
+ -S '#' -a 30 --arcs-only -i $< \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/mine' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/ja' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/nu' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/mus' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/ng' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/v' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/tav' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/nud' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/mata' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/matu' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/tamatu' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/tu' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/tud' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/lik' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/line' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/ne' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/lt' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/sti' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/ini' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/m' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/im' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/nna' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/kond' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/ist' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/is' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/us' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/ti' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/lane' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/kas' -a -10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+N' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+A' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Num' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Pron' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+V' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Adv' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Interj' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+CC' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+CS' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Adp' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Pref' -a 5 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Prop' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Card' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Ord' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Comp' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Superl' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Sg' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Pl' -a 1 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Nom' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Gen' -a 1 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Par' -a 2 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Ill' -a 3 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Ine' -a 3 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Ela' -a 3 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+All' -a 3 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Ade' -a 3 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Abl' -a 3 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Tra' -a 3 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Trm' -a 3 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Ess' -a 3 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Abe' -a 3 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Com' -a 3 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Impers' -a 1 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Pers' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Prs' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Prt' -a 1 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Ind' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Cond' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Imprt' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Quot' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Sg1' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Sg2' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Sg3' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Pl1' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Pl2' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Pl3' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Aff' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Neg' -a 1 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Sup' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Inf' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Ger' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Prc' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Foc/gi' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Emph' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Pref' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Dim/ke' -a 10 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+ABBR' -a 5 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+ACR' -a 5 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Usage/Rare' -a 30 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Usage/Hyp' -a 30 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Usage/NotNorm' -a 30 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Usage/CommonNotNorm' -a 30 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Use/Circ' -a 0 -A \
+ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Guess' -a 200 -A \
+ > $@
+
+else !WITH_OFST_TROPICAL
+
+guesser-raw.weighted.hfst: guesser-raw.simple.hfst
+ cp $< $@
+endif !WITH_OFST_TROPICAL
+
+# make the raw ones
+# HFST:
+# map the name to GT/Divvun conventions
+
+generator-raw-gt-desc.hfst: generator-raw-gt-desc.weighted.hfst
+ cp $< $@
+
+# Tokens ending with a dot (e.g. abbreviations) need special treament by a tokeniser
+# They cannot be a part of the analyser that the tokeniser uses
+# Therefore, dot-ending stuff must be added separately to the default descriptive analyser
+
+# .dot transducer is the the base for:
+# 1. -desc, -norm etc transducers
+# 2. guesser
+
+# This is the default, descriptive analyser:
+# Visible tags (ie do NOT remove):
+# - variant tags
+# - the Err/Orth tag
+# Invisible tags (ie to be removed):
+# - semantic tags
+# - homonymy tags
+
+analyser-gt-desc.dot.tmp.%: analyser-raw-gt-desc.% \
+ fst/abbrevdot.tmp.% \
+ filters/remove-area-tags.% \
+ filters/remove-dialect-tags.% \
+ filters/remove-number-string-tags.% \
+ filters/remove-usage-tags.% \
+ filters/remove-semantic-tags.% \
+ filters/remove-hyphenation-marks.% \
+ filters/remove-infl_deriv-borders.% \
+ filters/remove-word-boundary.% \
+ filters/remove-orthography-tags.% \
+ filters/remove-Orth_IPA-strings.% \
+ filters/remove-orig_lang-tags.% \
+ filters/remove-Use_GC-strings.% \
+ filters/remove-Use_minusGC-tags.% \
+ filters/remove-Use_minus_PMatch-tags.% \
+ filters/remove-Use_PMatch-strings.% \
+ filters/remove-mwe-tags.% \
+ orthography/inituppercase.compose.% \
+ orthography/allcaps.compose.% \
+ orthography/spellrelax.compose.% \
+ $(GLT_DOWNCASE_FILTER)
+ $(AM_V_XFST_TOOL)$(PRINTF) "read regex \
+ @\"filters/remove-area-tags.$*\" \
+ .o. @\"filters/remove-dialect-tags.$*\" \
+ .o. @\"filters/remove-number-string-tags.$*\" \
+ .o. @\"filters/remove-usage-tags.$*\" \
+ .o. @\"filters/remove-semantic-tags.$*\" \
+ .o. @\"filters/remove-orig_lang-tags.$*\" \
+ .o. @\"filters/remove-orthography-tags.$*\" \
+ .o. @\"filters/remove-Orth_IPA-strings.$*\" \
+ .o. @\"filters/remove-Use_minus_PMatch-tags.$*\" \
+ .o. @\"filters/remove-Use_GC-strings.$*\" \
+ .o. @\"filters/remove-Use_minusGC-tags.$*\" \
+ .o. @\"filters/remove-Use_PMatch-strings.$*\" \
+ .o. @\"filters/remove-mwe-tags.$*\" \
+ .o. [@\"$<\" | @\"fst/abbrevdot.tmp.$*\"] \
+ $(GLT_DOWNCASE_COMPOSE) \
+ .o. @\"filters/remove-hyphenation-marks.$*\" \
+ .o. @\"filters/remove-infl_deriv-borders.$*\" \
+ .o. @\"filters/remove-word-boundary.$*\" \
+ ; \n\
+ define fst \n\
+ set flag-is-epsilon ON\n\
+ set encode-weights ON\n\
+ read regex fst \
+ .o. @\"orthography/inituppercase.compose.$*\" \
+ .o. @\"orthography/allcaps.compose.$*\" \
+ .o. @\"orthography/spellrelax.compose.$*\" \
+ ; \n\
+ save stack $@\n\
+ quit\n" | $(XFST_TOOL)
+
+# This is the default, descriptive generating transducer.
+generator-gt-desc.dot.tmp.%: analyser-raw-gt-desc.% \
+ fst/abbrevdot.tmp.% \
+ filters/make-optional-transitivity-tags.% \
+ filters/make-optional-homonymy-tags.% \
+ filters/make-optional-hyph-tags.% \
+ filters/make-optional-variant-tags.% \
+ filters/make-optional-semantic-tags.% \
+ filters/make-optional-error-tags.% \
+ filters/make-optional-adv_comp-tags.% \
+ filters/make-optional-orig_lang-tags.% \
+ filters/remove-area-tags.% \
+ filters/remove-dialect-tags.% \
+ filters/remove-hyphenation-marks.% \
+ filters/remove-infl_deriv-borders.% \
+ filters/remove-word-boundary.% \
+ filters/remove-number-string-tags.% \
+ filters/remove-orthography-tags.% \
+ filters/remove-Orth_IPA-strings.% \
+ filters/remove-usage-tags.% \
+ filters/remove-Use_GC-strings.% \
+ filters/remove-Use_minusGC-tags.% \
+ filters/remove-Use_minus_PMatch-tags.% \
+ filters/remove-Use_PMatch-strings.% \
+ filters/remove-mwe-tags.% \
+ $(GLT_DOWNCASE_FILTER)
+ $(AM_V_XFST_TOOL)$(PRINTF) "read regex \
+ @\"filters/make-optional-transitivity-tags.$*\" \
+ .o. @\"filters/make-optional-homonymy-tags.$*\" \
+ .o. @\"filters/make-optional-hyph-tags.$*\" \
+ .o. @\"filters/make-optional-variant-tags.$*\" \
+ .o. @\"filters/make-optional-semantic-tags.$*\" \
+ .o. @\"filters/make-optional-error-tags.$*\" \
+ .o. @\"filters/make-optional-adv_comp-tags.$*\" \
+ .o. @\"filters/make-optional-orig_lang-tags.$*\" \
+ .o. @\"filters/remove-area-tags.$*\" \
+ .o. @\"filters/remove-dialect-tags.$*\" \
+ .o. @\"filters/remove-number-string-tags.$*\" \
+ .o. @\"filters/remove-usage-tags.$*\" \
+ .o. @\"filters/remove-orthography-tags.$*\" \
+ .o. @\"filters/remove-Orth_IPA-strings.$*\" \
+ .o. @\"filters/remove-Use_minus_PMatch-tags.$*\" \
+ .o. @\"filters/remove-Use_GC-strings.$*\" \
+ .o. @\"filters/remove-Use_minusGC-tags.$*\" \
+ .o. @\"filters/remove-Use_PMatch-strings.$*\" \
+ .o. @\"filters/remove-mwe-tags.$*\" \
+ .o. [@\"$<\" | @\"fst/abbrevdot.tmp.$*\"] \
+ $(GLT_DOWNCASE_COMPOSE) \
+ .o. @\"filters/remove-hyphenation-marks.$*\" \
+ .o. @\"filters/remove-infl_deriv-borders.$*\" \
+ .o. @\"filters/remove-word-boundary.$*\" \
+ ;\n\
+ save stack $@\n\
+ quit\n" | $(XFST_TOOL)
+
+# override the default descriptive analyser and generator
+# result: lexicon-based analyses
+
+analyser-gt-desc.tmp.hfst: analyser-gt-desc.dot.tmp.hfst \
+ filters/remove-guessed-forms.est.hfst
+ $(AM_V_XFST_TOOL)$(PRINTF) "read regex \
+ @\"filters/remove-guessed-forms.est.hfst\" \
+ .o. @\"$<\" \
+ ;\n\
+ save stack $@\n\
+ quit\n" | $(XFST_TOOL)
+
+generator-gt-desc.tmp.hfst: generator-gt-desc.dot.tmp.hfst \
+ filters/remove-guessed-forms.est.hfst
+ $(AM_V_XFST_TOOL)$(PRINTF) "read regex \
+ @\"filters/remove-guessed-forms.est.hfst\" \
+ .o. @\"$<\" \
+ ;\n\
+ save stack $@\n\
+ quit\n" | $(XFST_TOOL)
+
+
+#--- begin guesser-related ad hoc
+
+# substitute placeholders with fsts containing real symbols;
+# relax hyphen and apostrophe writing conventions
+# result: lexicon-based analyses plus blablawords
+# this will be later unioned with simplex word guesser to result in a complete guesser
+
+analyser-gt-descguess.hfst: analyser-gt-desc.dot.tmp.hfst \
+ fst/substitute_blockcap.xfscript \
+ fst/substitutions.xfscript \
+ orthography/punctrelax.compose.hfst
+ $(AM_V_XFST_TOOL)$(PRINTF) "set encode-weights ON\n\
+ read regex @\"$<\";\n\
+ source fst/substitute_blockcap.xfscript\n\
+ source fst/substitutions.xfscript\n\
+ define fst\n\
+ read regex fst \
+ .o. @\"orthography/punctrelax.compose.hfst\" \
+ ;\n\
+ $(INVERT_HFST)\
+ save stack $@\n\
+ quit\n" | $(XFST_TOOL)
+
+# ... and generators:
+# substitute placeholders with fsts containing real symbols
-pair_initial.tmp.lexc: pair_initial.lexc $(GT_LEXC_ROOT) $(EST_AFFIX_FILES:%=%.lexc)
- $(AM_V_GEN)cat $(GT_LEXC_ROOT) | sed '/^ *LEXICON *Root/,$$d' > $@ && \
- cat $< >> $@ && \
- cat $(EST_AFFIX_FILES:%=%.lexc) | sed 's/@.\.[^@]*@//g' >> $@
-
-num_initial.tmp.lexc: num_initial.lexc $(GT_LEXC_ROOT) $(EST_AFFIX_FILES:%=%.lexc)
- $(AM_V_GEN)cat $(GT_LEXC_ROOT) | sed '/^ *LEXICON *Root/,$$d' > $@ && \
- cat $< >> $@ && \
- cat $(EST_AFFIX_FILES:%=%.lexc) | sed 's/@.\.[^@]*@//g' >> $@
-
-pair_final.tmp.lexc: pair_final.lexc $(GT_LEXC_ROOT) $(EST_AFFIX_FILES:%=%.lexc)
- $(AM_V_GEN)cat $(GT_LEXC_ROOT) | sed '/^ *LEXICON *Root/,$$d' > $@ && \
- cat $< >> $@ && \
- cat $(EST_AFFIX_FILES:%=%.lexc) >> $@
-
-num_final.tmp.lexc: num_final.lexc $(GT_LEXC_ROOT) $(EST_AFFIX_FILES:%=%.lexc)
- $(AM_V_GEN)cat $(GT_LEXC_ROOT) | sed '/^ *LEXICON *Root/,$$d' > $@ && \
- cat $< >> $@ && \
- cat $(EST_AFFIX_FILES:%=%.lexc) >> $@
-
-# abbreviations with a dot ...
-# ... should be kept separately because of tokenisation issues
-# copy the abbrevs that can have a final dot and format them appropriately:
-# attach a final dot and continuation lexicon ;
-# and add flag diacritics to prevent these abbreviations to be part of some compound word
-
-abbrevdot.tmp.lexc: stems/abbreviations.lexc $(GT_LEXC_ROOT) $(EST_AFFIX_FILES:%=%.lexc)
- $(AM_V_GEN)cat $(GT_LEXC_ROOT) | sed '/^ *LEXICON *Root/,$$d' > $@ && \
- echo 'LEXICON Root' >> $@ && \
- echo ' @D.Part@@P.Part.Bad@ DABBR ;' >> $@ && \
- echo 'LEXICON DABBR' >> $@ && \
- $(AM_V_GEN)cat $< | grep '^ *!.*DOTABBR' | sed 's/^ *!//' >> $@ && \
- $(AM_V_GEN)cat $< | grep 'may also end with a dot' | \
- sed 's/:\([^ ]*\) *[^ ]* *; *! *may also end with a dot/:\1 DOTABBR ;/' >> $@ && \
- cat $(EST_AFFIX_FILES:%=%.lexc) >> $@
-
-
-# guesser:
-# re-using root.lexc and affixes
-guesser-simplex-nouns.tmp.lexc: guesser-simplex-nouns.lexc $(GT_LEXC_ROOT) $(EST_AFFIX_FILES:%=%.lexc)
- $(AM_V_GEN)cat $(GT_LEXC_ROOT) | sed '/^ *LEXICON *Root/,$$d' > $@ && \
- cat $< >> $@ && \
- cat $(EST_AFFIX_FILES:%=%.lexc) >> $@
-
-guesser-names.tmp0.lexc: guesser-simplex-nouns.lexc $(GT_LEXC_ROOT) $(EST_AFFIX_FILES:%=%.lexc)
- $(AM_V_GEN)cat $(GT_LEXC_ROOT) | sed '/^ *LEXICON *Root/,$$d' > $@ && \
- cat $< >> $@ && \
- cat $(EST_AFFIX_FILES:%=%.lexc) >> $@
-
-### END: Local processing: ###
-
-####### Other targets: ###########
-# Clean: add local clean targets on separate lines, so that the first line can
-# easily get updates from the template dir through svn merge.
-clean-local:
- -rm -f *.all.* *fst *.foma *.script generated_files/*.lexc lexicon.*
- -rm -f url.lexc *.tmp*
- -rm -f *.relabel lexicon-tags.* lexicon-sigma.*
-
-include $(srcdir)/Makefile.modifications-phon.am
-include $(top_srcdir)/../giella-core/am-shared/src-morphology-dir-include.am
-
-# vim: set ft=automake:
+generator-gt-descguess.hfst: generator-gt-desc.dot.tmp.hfst \
+ fst/substitute_blockcap.xfscript \
+ fst/substitutions.xfscript
+ $(AM_V_XFST_TOOL)$(PRINTF) "set encode-weights ON\n\
+ read regex @\"$<\";\n\
+ source fst/substitute_blockcap.xfscript\n\
+ source fst/substitutions.xfscript\n\
+ $(INVERT_XFST)$(INVERT_FOMA)\
+ save stack $@\n\
+ quit\n" | $(XFST_TOOL)
+
+# --- end guesser-related ad hoc
+
+# override the default normative analyser and generator
+# (is it necessary actually? where are they used ?)
+analyser-gt-norm.tmp.hfst: analyser-gt-desc.tmp.hfst \
+ filters/remove-nospell-words.est.hfst \
+ filters/remove-NotNorm-wordforms.est.hfst
+ $(AM_V_XFST_TOOL)$(PRINTF) "set flag-is-epsilon ON\n\
+ read regex \
+ @\"filters/remove-nospell-words.est.hfst\" \
+ .o. @\"filters/remove-NotNorm-wordforms.est.hfst\" \
+ .o. @\"$<\" \
+ ;\n\
+ save stack $@\n\
+ quit\n" | $(XFST_TOOL)
+
+generator-gt-norm.tmp.hfst: generator-gt-desc.tmp.hfst \
+ filters/remove-nospell-words.est.hfst \
+ filters/remove-NotNorm-wordforms.est.hfst
+ $(AM_V_XFST_TOOL)$(PRINTF) "set flag-is-epsilon ON\n\
+ read regex \
+ @\"filters/remove-nospell-words.est.hfst\" \
+ .o. @\"filters/remove-NotNorm-wordforms.est.hfst\" \
+ .o. @\"$<\" \
+ ;\n\
+ save stack $@\n\
+ quit\n" | $(XFST_TOOL)
+
+
+# HFST:
+# simplex word guesser;
+# result: guessed simplex words and derivations with weights
+# (surface side without phonotactics symbols)
+generator-raw-gt-guess.hfst: guesser-raw.weighted.hfst \
+ filters/remove-hyphenation-marks.hfst \
+ filters/remove-infl_deriv-borders.hfst \
+ filters/remove-word-boundary.hfst
+ $(AM_V_XFST_TOOL)$(PRINTF) "read regex \
+ @\"$<\" \
+ .o. @\"filters/remove-hyphenation-marks.hfst\" \
+ .o. @\"filters/remove-infl_deriv-borders.hfst\" \
+ .o. @\"filters/remove-word-boundary.hfst\" \
+ ;\n\
+ save stack $@\n\
+ quit\n" | $(XFST_TOOL)
+
+# HFST:
+# simplex word guesser, enlarged to cover standard orthography
+# upcasing added (like for analyser-gt-desc)
+# perhaps a spellrelax filter should be also added ?
+analyser-raw-gt-guess.hfst: generator-raw-gt-guess.hfst \
+ orthography/inituppercase.compose.hfst \
+ orthography/allcaps.compose.hfst \
+ orthography/spellrelax.compose.hfst
+ $(AM_V_XFST_TOOL)$(PRINTF) "set flag-is-epsilon ON\n\
+ read regex \
+ @\"$<\" \
+ .o. [@\"orthography/inituppercase.compose.hfst\" \
+ | @\"orthography/allcaps.compose.hfst\" ] \
+ .o. @\"orthography/spellrelax.compose.hfst\" \
+ ;\n\
+ invert net \n\
+ save stack $@\n\
+ quit\n" | $(XFST_TOOL)
+
+
+# XEROX:
+# probably garbage anyway
+analyser-raw-gt-desc.xfst: analyser-raw-gt-desc.vocabulary.xfst
+ cp $< $@
+
+# complete guesser
+# includes
+# 1) phonological patterns for simplex words (and names) and their derivations
+# 2) all the lexicon-based words (simplex, compound, derived; paired)
+# 3) blablawords, i.e. words where the last component gets an analysis, and the rest is +Guess
+
+# katsetus
+generator-gt-guess.hfst: generator-gt-descguess.hfst \
+ generator-raw-gt-guess.hfst
+ $(AM_V_XFST_TOOL)$(PRINTF) "set encode-weights ON\n\
+ read regex \
+ [[ @\"$<\"] | @\"generator-raw-gt-guess.hfst\"] \
+ ; \n\
+ save stack $@\n\
+ quit\n" | $(XFST_TOOL)
+
+analyser-gt-guess.hfst: analyser-gt-descguess.hfst \
+ analyser-raw-gt-guess.hfst
+ $(AM_V_XFST_TOOL)$(PRINTF) "set encode-weights ON\n\
+ read regex \
+ [[ @\"$<\" ] | @\"analyser-raw-gt-guess.hfst\"] \
+ ; \n\
+ save stack $@\n\
+ quit\n" | $(XFST_TOOL)
+
+
+# XEROX:
+# not implemented...
+#
+
+
+##################################################################
+#### END: Add local processing instructions ABOVE this line ######
+##################################################################
+include $(top_srcdir)/../giella-core/am-shared/src-fst-dir-include.am
diff --git a/src/filters/.gitignore b/src/fst/filters/.gitignore
similarity index 100%
rename from src/filters/.gitignore
rename to src/fst/filters/.gitignore
diff --git a/src/filters/Makefile.am b/src/fst/filters/Makefile.am
similarity index 64%
rename from src/filters/Makefile.am
rename to src/fst/filters/Makefile.am
index 643fbf9e..4724b67d 100644
--- a/src/filters/Makefile.am
+++ b/src/fst/filters/Makefile.am
@@ -27,23 +27,7 @@ GIELLA_FILTER_LOCAL_REGEX_SRCS=\
rename-POS_before_Der-tags.regex
# List any local filter xfscript files here:
-GIELLA_FILTER_LOCAL_XFSCRIPT_SRCS=reorder-tags.est.xfscript \
- remove-sg-forms.est.xfscript \
- remove-pl-forms.est.xfscript \
- remove-sg-nom-forms.est.xfscript \
- remove-non-gi-forms.est.xfscript \
- remove-usage-tags.est.xfscript \
- remove-nospell-words.est.xfscript \
- remove-NotNorm-wordforms.est.xfscript \
- modify-derivations.est.xfscript \
- block-derivations.est.xfscript \
- block-compounds.est.xfscript \
- wordpair-filter.est.xfscript \
- numeral-filter.est.xfscript \
- evaluate-flags.est.xfscript \
- downcase-derived_proper-strings.est.xfscript \
- upcase-guessed-names.est.xfscript \
- remove-guessed-forms.est.xfscript
+GIELLA_FILTER_LOCAL_XFSCRIPT_SRCS=
# List any local filter lexc files here:
GIELLA_FILTER_LOCAL_LEXC_SRCS=
diff --git a/src/filters/block-compounds.est.xfscript b/src/fst/filters/block-compounds.est.xfscript
similarity index 100%
rename from src/filters/block-compounds.est.xfscript
rename to src/fst/filters/block-compounds.est.xfscript
diff --git a/src/filters/block-derivations.est.xfscript b/src/fst/filters/block-derivations.est.xfscript
similarity index 100%
rename from src/filters/block-derivations.est.xfscript
rename to src/fst/filters/block-derivations.est.xfscript
diff --git a/src/filters/downcase-derived_proper-strings.est.xfscript b/src/fst/filters/downcase-derived_proper-strings.est.xfscript
similarity index 100%
rename from src/filters/downcase-derived_proper-strings.est.xfscript
rename to src/fst/filters/downcase-derived_proper-strings.est.xfscript
diff --git a/src/filters/downcase_UCletters.regex b/src/fst/filters/downcase_UCletters.regex
similarity index 100%
rename from src/filters/downcase_UCletters.regex
rename to src/fst/filters/downcase_UCletters.regex
diff --git a/src/filters/evaluate-flags.est.xfscript b/src/fst/filters/evaluate-flags.est.xfscript
similarity index 100%
rename from src/filters/evaluate-flags.est.xfscript
rename to src/fst/filters/evaluate-flags.est.xfscript
diff --git a/src/filters/modify-derivations.est.xfscript b/src/fst/filters/modify-derivations.est.xfscript
similarity index 100%
rename from src/filters/modify-derivations.est.xfscript
rename to src/fst/filters/modify-derivations.est.xfscript
diff --git a/src/filters/numeral-filter.est.xfscript b/src/fst/filters/numeral-filter.est.xfscript
similarity index 100%
rename from src/filters/numeral-filter.est.xfscript
rename to src/fst/filters/numeral-filter.est.xfscript
diff --git a/src/filters/remove-DNorm-tags.regex b/src/fst/filters/remove-DNorm-tags.regex
similarity index 100%
rename from src/filters/remove-DNorm-tags.regex
rename to src/fst/filters/remove-DNorm-tags.regex
diff --git a/src/filters/remove-NotNorm-wordforms.est.xfscript b/src/fst/filters/remove-NotNorm-wordforms.est.xfscript
similarity index 100%
rename from src/filters/remove-NotNorm-wordforms.est.xfscript
rename to src/fst/filters/remove-NotNorm-wordforms.est.xfscript
diff --git a/src/filters/remove-derivation-position-tags.regex b/src/fst/filters/remove-derivation-position-tags.regex
similarity index 100%
rename from src/filters/remove-derivation-position-tags.regex
rename to src/fst/filters/remove-derivation-position-tags.regex
diff --git a/src/filters/remove-guessed-forms.est.xfscript b/src/fst/filters/remove-guessed-forms.est.xfscript
similarity index 100%
rename from src/filters/remove-guessed-forms.est.xfscript
rename to src/fst/filters/remove-guessed-forms.est.xfscript
diff --git a/src/filters/remove-non-gi-forms.est.xfscript b/src/fst/filters/remove-non-gi-forms.est.xfscript
similarity index 100%
rename from src/filters/remove-non-gi-forms.est.xfscript
rename to src/fst/filters/remove-non-gi-forms.est.xfscript
diff --git a/src/filters/remove-norm-comp-tags.regex b/src/fst/filters/remove-norm-comp-tags.regex
similarity index 100%
rename from src/filters/remove-norm-comp-tags.regex
rename to src/fst/filters/remove-norm-comp-tags.regex
diff --git a/src/filters/remove-nospell-words.est.xfscript b/src/fst/filters/remove-nospell-words.est.xfscript
similarity index 100%
rename from src/filters/remove-nospell-words.est.xfscript
rename to src/fst/filters/remove-nospell-words.est.xfscript
diff --git a/src/filters/remove-pl-forms.est.xfscript b/src/fst/filters/remove-pl-forms.est.xfscript
similarity index 100%
rename from src/filters/remove-pl-forms.est.xfscript
rename to src/fst/filters/remove-pl-forms.est.xfscript
diff --git a/src/filters/remove-sg-forms.est.xfscript b/src/fst/filters/remove-sg-forms.est.xfscript
similarity index 100%
rename from src/filters/remove-sg-forms.est.xfscript
rename to src/fst/filters/remove-sg-forms.est.xfscript
diff --git a/src/filters/remove-sg-nom-forms.est.xfscript b/src/fst/filters/remove-sg-nom-forms.est.xfscript
similarity index 100%
rename from src/filters/remove-sg-nom-forms.est.xfscript
rename to src/fst/filters/remove-sg-nom-forms.est.xfscript
diff --git a/src/filters/remove-usage-tags.est.xfscript b/src/fst/filters/remove-usage-tags.est.xfscript
similarity index 100%
rename from src/filters/remove-usage-tags.est.xfscript
rename to src/fst/filters/remove-usage-tags.est.xfscript
diff --git a/src/filters/rename-POS_before_Der-tags.regex b/src/fst/filters/rename-POS_before_Der-tags.regex
similarity index 100%
rename from src/filters/rename-POS_before_Der-tags.regex
rename to src/fst/filters/rename-POS_before_Der-tags.regex
diff --git a/src/filters/reorder-tags.est.xfscript b/src/fst/filters/reorder-tags.est.xfscript
similarity index 100%
rename from src/filters/reorder-tags.est.xfscript
rename to src/fst/filters/reorder-tags.est.xfscript
diff --git a/src/filters/upcase-guessed-names.est.xfscript b/src/fst/filters/upcase-guessed-names.est.xfscript
similarity index 100%
rename from src/filters/upcase-guessed-names.est.xfscript
rename to src/fst/filters/upcase-guessed-names.est.xfscript
diff --git a/src/filters/upcase-guessed_proper-strings.est.xfscript b/src/fst/filters/upcase-guessed_proper-strings.est.xfscript
similarity index 100%
rename from src/filters/upcase-guessed_proper-strings.est.xfscript
rename to src/fst/filters/upcase-guessed_proper-strings.est.xfscript
diff --git a/src/filters/wordpair-filter.est.xfscript b/src/fst/filters/wordpair-filter.est.xfscript
similarity index 100%
rename from src/filters/wordpair-filter.est.xfscript
rename to src/fst/filters/wordpair-filter.est.xfscript
diff --git a/src/fst/morphology/Makefile.am b/src/fst/morphology/Makefile.am
new file mode 100644
index 00000000..fdd0c51d
--- /dev/null
+++ b/src/fst/morphology/Makefile.am
@@ -0,0 +1,202 @@
+## Process this file with automake to produce Makefile.in
+
+## Copyright (C) 2011 Samediggi
+
+## This program is free software: you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation, either version 3 of the License, or
+## (at your option) any later version.
+
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+
+## You should have received a copy of the GNU General Public License
+## along with this program. If not, see .
+
+# Add language-specific flags for hfst-lexc compilation here:
+if HAVE_SHARED_COMMON
+HFST_LEXC_LOCAL_FLAGS= # --Werror # uncomment if lexc is good enough
+else
+HFST_LEXC_LOCAL_FLAGS= # No --Werror if deps are missing !
+endif
+
+
+####### Morphology source file defs: ########
+
+# Set this to name of lexc file containing Multichar_Symbols and LEXICON Root
+GT_LEXC_ROOT=$(srcdir)/root.lexc
+
+# Set this to the names of all regular lexc source files:
+GT_LEXC_SRCS_L1_L2=\
+ stems/abbreviations.lexc \
+ stems/adjectives.lexc \
+ stems/noninflecting_adjectives.lexc \
+ stems/comparative_adjectives.lexc \
+ stems/superlative_adjectives.lexc \
+ stems/adpositions.lexc \
+ stems/adverbs.lexc \
+ stems/conjunctions.lexc \
+ stems/genitive_attributes.lexc \
+ stems/interjections.lexc \
+ stems/nouns.lexc \
+ stems/cardinalnumerals.lexc \
+ stems/ordinalnumerals.lexc \
+ stems/pronouns.lexc \
+ stems/propernouns.lexc \
+ stems/verbs.lexc \
+ stems/noninflecting_verbs.lexc \
+ stems/prefixes.lexc \
+ stems/final_components.lexc \
+ stems/numbers.lexc \
+ stems/acronyms.lexc \
+ stems/symbol_strings.lexc \
+ affixes/regular_declinations.lexc \
+ affixes/exceptional_declinations.lexc \
+ affixes/verbs.lexc \
+ affixes/gi.lexc
+
+# If you are building an error-detecting L2 analyser, specify the lexc files
+# that differ between the regular L1 and the L2 analysers below, in L1 and
+# L2 respectively. L2 files must end in "*-L2.lexc". See SME for an example.
+L1=
+
+L2=
+
+GT_LEXC_SRCS=\
+ $(GT_LEXC_SRCS_L1_L2) \
+ $(L1)
+
+GT_LEXC_L2_SRCS=\
+ $(GT_LEXC_SRCS_L1_L2) \
+ $(L2)
+
+# Set this to the names of all generated lexc files, if any
+GENERATED_LEXC_SRCS=generated_files/mul-$(GLANG)-punctuation.lexc \
+ generated_files/mul-$(GLANG)-symbols.lexc
+
+# change handling of shared lexical data here:
+if HAVE_SHARED_COMMON
+.generated/url.tmp.lexc: $(gt_SHARED_common)/src/fst/url.lexc
+ $(MAKE) $(GENDIR)
+ $(AM_V_CP)cp -f $< $@
+
+generated_files/mul-$(GLANG)-%.lexc: $(gt_SHARED_common)/src/fst/stems/%.lexc
+ $(AM_V_at)$(MKDIR_P) generated_files
+ $(AM_V_CP)cp -f $< $@
+else
+# this is "safe" fallback (compiles but you miss everything)
+.generated/url.tmp.lexc:
+ echo "LEXICON Root" > $@
+ echo "< h t t p (s) %: %/ %/ ?*> # ;" >> $@
+
+.generated/mul-$(GLANG)-%.lexc:
+ $(MAKE) $(GENDIR)
+ echo "! Missing shared common data" > $@
+endif
+# add other lexical shared data handling here
+
+# Set this to the names of all source xml files, if any
+GT_XML_SRCS=
+
+# Define any additional lexc sources here (compiled on their own):
+GT_LOCAL_SRCS=\
+ pair_initial.tmp.lexc \
+ pair_final.tmp.lexc \
+ num_initial.tmp.lexc \
+ num_final.tmp.lexc \
+ abbrevdot.tmp.lexc \
+ guesser-simplex-nouns.tmp.lexc \
+ guesser-names.tmp0.lexc
+
+# guesser-derivations.tmp.lexc
+# punctuation.tmp.lexc
+# acronyms.tmp.lexc
+
+# Define local xfscripts here:
+GT_LOCAL_XFSCRIPT_SRCS=
+
+# Define here any additional sources just included in the distro:
+GT_DISTRO_SRCS=
+
+### BEGIN: Local processing: ###
+EST_AFFIX_FILES=$(srcdir)/affixes/regular_declinations \
+ $(srcdir)/affixes/exceptional_declinations \
+ $(srcdir)/affixes/verbs \
+ $(srcdir)/affixes/gi
+
+# make the parts inflect
+# by re-using root.lexc and affixes, and omitting all the stem lexicons
+# ... and remove the flag diacritics from initial parts (why? because otherwise the filter that
+# puts the initial and final part together doesn't work ?)
+
+pair_initial.tmp.lexc: pair_initial.lexc $(GT_LEXC_ROOT) $(EST_AFFIX_FILES:%=%.lexc)
+ $(AM_V_GEN)cat $(GT_LEXC_ROOT) | sed '/^ *LEXICON *Root/,$$d' > $@ && \
+ cat $< >> $@ && \
+ cat $(EST_AFFIX_FILES:%=%.lexc) | sed 's/@.\.[^@]*@//g' >> $@
+
+num_initial.tmp.lexc: num_initial.lexc $(GT_LEXC_ROOT) $(EST_AFFIX_FILES:%=%.lexc)
+ $(AM_V_GEN)cat $(GT_LEXC_ROOT) | sed '/^ *LEXICON *Root/,$$d' > $@ && \
+ cat $< >> $@ && \
+ cat $(EST_AFFIX_FILES:%=%.lexc) | sed 's/@.\.[^@]*@//g' >> $@
+
+pair_final.tmp.lexc: pair_final.lexc $(GT_LEXC_ROOT) $(EST_AFFIX_FILES:%=%.lexc)
+ $(AM_V_GEN)cat $(GT_LEXC_ROOT) | sed '/^ *LEXICON *Root/,$$d' > $@ && \
+ cat $< >> $@ && \
+ cat $(EST_AFFIX_FILES:%=%.lexc) >> $@
+
+num_final.tmp.lexc: num_final.lexc $(GT_LEXC_ROOT) $(EST_AFFIX_FILES:%=%.lexc)
+ $(AM_V_GEN)cat $(GT_LEXC_ROOT) | sed '/^ *LEXICON *Root/,$$d' > $@ && \
+ cat $< >> $@ && \
+ cat $(EST_AFFIX_FILES:%=%.lexc) >> $@
+
+# abbreviations with a dot ...
+# ... should be kept separately because of tokenisation issues
+# copy the abbrevs that can have a final dot and format them appropriately:
+# attach a final dot and continuation lexicon ;
+# and add flag diacritics to prevent these abbreviations to be part of some compound word
+
+abbrevdot.tmp.lexc: stems/abbreviations.lexc $(GT_LEXC_ROOT) $(EST_AFFIX_FILES:%=%.lexc)
+ $(AM_V_GEN)cat $(GT_LEXC_ROOT) | sed '/^ *LEXICON *Root/,$$d' > $@ && \
+ echo 'LEXICON Root' >> $@ && \
+ echo ' @D.Part@@P.Part.Bad@ DABBR ;' >> $@ && \
+ echo 'LEXICON DABBR' >> $@ && \
+ $(AM_V_GEN)cat $< | grep '^ *!.*DOTABBR' | sed 's/^ *!//' >> $@ && \
+ $(AM_V_GEN)cat $< | grep 'may also end with a dot' | \
+ sed 's/:\([^ ]*\) *[^ ]* *; *! *may also end with a dot/:\1 DOTABBR ;/' >> $@ && \
+ cat $(EST_AFFIX_FILES:%=%.lexc) >> $@
+
+
+
+# guesser:
+# re-using root.lexc and affixes
+guesser-simplex-nouns.tmp.lexc: guesser-simplex-nouns.lexc $(GT_LEXC_ROOT) $(EST_AFFIX_FILES:%=%.lexc)
+ $(AM_V_GEN)cat $(GT_LEXC_ROOT) | sed '/^ *LEXICON *Root/,$$d' > $@ && \
+ cat $< >> $@ && \
+ cat $(EST_AFFIX_FILES:%=%.lexc) >> $@
+
+guesser-names.tmp0.lexc: guesser-simplex-nouns.lexc $(GT_LEXC_ROOT) $(EST_AFFIX_FILES:%=%.lexc)
+ $(AM_V_GEN)cat $(GT_LEXC_ROOT) | sed '/^ *LEXICON *Root/,$$d' > $@ && \
+ cat $< >> $@ && \
+ cat $(EST_AFFIX_FILES:%=%.lexc) >> $@
+
+### List additional targets in the following variable, for build targets not
+### covered by other means. This comes ***in addition to*** what you can do by
+### just targeting lexicon.*: lexicon.tmp.*, and is useful if you want to build
+### separate fst's that need further treatment in the src/ dir. See the language
+### gle/ for an example of how this is used.
+
+GIELLA_LOCAL_TARGETS=
+
+### END: Local processing: ###
+
+####### Other targets: ###########
+# Clean: add local clean targets on separate lines, so that the first line can
+# easily get updates from the template dir through svn merge.
+
+include $(srcdir)/Makefile.modifications-local.am
+include $(srcdir)/Makefile.modifications-phon.am
+include $(top_srcdir)/../giella-core/am-shared/src-morphology-dir-include.am
+
+# vim: set ft=automake:
diff --git a/src/fst/morphology/Makefile.modifications-local.am b/src/fst/morphology/Makefile.modifications-local.am
new file mode 100644
index 00000000..f60bc693
--- /dev/null
+++ b/src/fst/morphology/Makefile.modifications-local.am
@@ -0,0 +1,20 @@
+## Process this file with automake to produce Makefile.in
+
+## Copyright (C) 2011 Samediggi
+
+## This program is free software: you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation, either version 3 of the License, or
+## (at your option) any later version.
+
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+
+## You should have received a copy of the GNU General Public License
+## along with this program. If not, see .
+
+# Add local build rules and shared here...
+
+
diff --git a/src/fst/morphology/Makefile.modifications-phon.am b/src/fst/morphology/Makefile.modifications-phon.am
new file mode 100644
index 00000000..8c872aee
--- /dev/null
+++ b/src/fst/morphology/Makefile.modifications-phon.am
@@ -0,0 +1,26 @@
+## Process this file with automake to produce Makefile.in
+
+## Copyright (C) 2011 Samediggi
+
+## This program is free software: you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation, either version 3 of the License, or
+## (at your option) any later version.
+
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+
+## You should have received a copy of the GNU General Public License
+## along with this program. If not, see .
+
+####### Source file defs: ########
+
+#! @param GT_TWOLC_MAIN required, the source of phonology
+#! @param GT_TWOLC_SUPPLEMENTS optional, other sources to distribute and
+#! compile for other things
+GT_PHONOLOGY_MAIN=phonology.twolc
+GT_PHONOLOGY_SUPPLEMENTS=
+
+# vim: set ft=automake:
diff --git a/src/fst/affixes/exceptional_declinations.lexc b/src/fst/morphology/affixes/exceptional_declinations.lexc
similarity index 100%
rename from src/fst/affixes/exceptional_declinations.lexc
rename to src/fst/morphology/affixes/exceptional_declinations.lexc
diff --git a/src/fst/affixes/gi.lexc b/src/fst/morphology/affixes/gi.lexc
similarity index 100%
rename from src/fst/affixes/gi.lexc
rename to src/fst/morphology/affixes/gi.lexc
diff --git a/src/fst/affixes/regular_declinations.lexc b/src/fst/morphology/affixes/regular_declinations.lexc
similarity index 100%
rename from src/fst/affixes/regular_declinations.lexc
rename to src/fst/morphology/affixes/regular_declinations.lexc
diff --git a/src/fst/affixes/verbs.lexc b/src/fst/morphology/affixes/verbs.lexc
similarity index 100%
rename from src/fst/affixes/verbs.lexc
rename to src/fst/morphology/affixes/verbs.lexc
diff --git a/src/fst/generated_files/00README.txt b/src/fst/morphology/generated_files/00README.txt
similarity index 100%
rename from src/fst/generated_files/00README.txt
rename to src/fst/morphology/generated_files/00README.txt
diff --git a/src/fst/incoming/00README.txt b/src/fst/morphology/incoming/00README.txt
similarity index 100%
rename from src/fst/incoming/00README.txt
rename to src/fst/morphology/incoming/00README.txt
diff --git a/src/fst/phonology.twolc b/src/fst/morphology/phonology.twolc
similarity index 100%
rename from src/fst/phonology.twolc
rename to src/fst/morphology/phonology.twolc
diff --git a/src/fst/root.lexc b/src/fst/morphology/root.lexc
similarity index 100%
rename from src/fst/root.lexc
rename to src/fst/morphology/root.lexc
diff --git a/src/fst/stems/abbreviations.lexc b/src/fst/morphology/stems/abbreviations.lexc
similarity index 100%
rename from src/fst/stems/abbreviations.lexc
rename to src/fst/morphology/stems/abbreviations.lexc
diff --git a/src/fst/stems/acronyms.lexc b/src/fst/morphology/stems/acronyms.lexc
similarity index 100%
rename from src/fst/stems/acronyms.lexc
rename to src/fst/morphology/stems/acronyms.lexc
diff --git a/src/fst/stems/adjectives.lexc b/src/fst/morphology/stems/adjectives.lexc
similarity index 100%
rename from src/fst/stems/adjectives.lexc
rename to src/fst/morphology/stems/adjectives.lexc
diff --git a/src/fst/stems/adpositions.lexc b/src/fst/morphology/stems/adpositions.lexc
similarity index 100%
rename from src/fst/stems/adpositions.lexc
rename to src/fst/morphology/stems/adpositions.lexc
diff --git a/src/fst/stems/adverbs.lexc b/src/fst/morphology/stems/adverbs.lexc
similarity index 100%
rename from src/fst/stems/adverbs.lexc
rename to src/fst/morphology/stems/adverbs.lexc
diff --git a/src/fst/stems/cardinalnumerals.lexc b/src/fst/morphology/stems/cardinalnumerals.lexc
similarity index 100%
rename from src/fst/stems/cardinalnumerals.lexc
rename to src/fst/morphology/stems/cardinalnumerals.lexc
diff --git a/src/fst/stems/comparative_adjectives.lexc b/src/fst/morphology/stems/comparative_adjectives.lexc
similarity index 100%
rename from src/fst/stems/comparative_adjectives.lexc
rename to src/fst/morphology/stems/comparative_adjectives.lexc
diff --git a/src/fst/stems/conjunctions.lexc b/src/fst/morphology/stems/conjunctions.lexc
similarity index 100%
rename from src/fst/stems/conjunctions.lexc
rename to src/fst/morphology/stems/conjunctions.lexc
diff --git a/src/fst/stems/final_components.lexc b/src/fst/morphology/stems/final_components.lexc
similarity index 100%
rename from src/fst/stems/final_components.lexc
rename to src/fst/morphology/stems/final_components.lexc
diff --git a/src/fst/stems/genitive_attributes.lexc b/src/fst/morphology/stems/genitive_attributes.lexc
similarity index 100%
rename from src/fst/stems/genitive_attributes.lexc
rename to src/fst/morphology/stems/genitive_attributes.lexc
diff --git a/src/fst/stems/interjections.lexc b/src/fst/morphology/stems/interjections.lexc
similarity index 100%
rename from src/fst/stems/interjections.lexc
rename to src/fst/morphology/stems/interjections.lexc
diff --git a/src/fst/stems/noninflecting_adjectives.lexc b/src/fst/morphology/stems/noninflecting_adjectives.lexc
similarity index 100%
rename from src/fst/stems/noninflecting_adjectives.lexc
rename to src/fst/morphology/stems/noninflecting_adjectives.lexc
diff --git a/src/fst/stems/noninflecting_verbs.lexc b/src/fst/morphology/stems/noninflecting_verbs.lexc
similarity index 100%
rename from src/fst/stems/noninflecting_verbs.lexc
rename to src/fst/morphology/stems/noninflecting_verbs.lexc
diff --git a/src/fst/stems/nouns.lexc b/src/fst/morphology/stems/nouns.lexc
similarity index 100%
rename from src/fst/stems/nouns.lexc
rename to src/fst/morphology/stems/nouns.lexc
diff --git a/src/fst/stems/numbers.lexc b/src/fst/morphology/stems/numbers.lexc
similarity index 100%
rename from src/fst/stems/numbers.lexc
rename to src/fst/morphology/stems/numbers.lexc
diff --git a/src/fst/stems/ordinalnumerals.lexc b/src/fst/morphology/stems/ordinalnumerals.lexc
similarity index 100%
rename from src/fst/stems/ordinalnumerals.lexc
rename to src/fst/morphology/stems/ordinalnumerals.lexc
diff --git a/src/fst/stems/prefixes.lexc b/src/fst/morphology/stems/prefixes.lexc
similarity index 100%
rename from src/fst/stems/prefixes.lexc
rename to src/fst/morphology/stems/prefixes.lexc
diff --git a/src/fst/stems/pronouns.lexc b/src/fst/morphology/stems/pronouns.lexc
similarity index 100%
rename from src/fst/stems/pronouns.lexc
rename to src/fst/morphology/stems/pronouns.lexc
diff --git a/src/fst/stems/propernouns.lexc b/src/fst/morphology/stems/propernouns.lexc
similarity index 100%
rename from src/fst/stems/propernouns.lexc
rename to src/fst/morphology/stems/propernouns.lexc
diff --git a/src/fst/stems/superlative_adjectives.lexc b/src/fst/morphology/stems/superlative_adjectives.lexc
similarity index 100%
rename from src/fst/stems/superlative_adjectives.lexc
rename to src/fst/morphology/stems/superlative_adjectives.lexc
diff --git a/src/fst/stems/symbol_strings.lexc b/src/fst/morphology/stems/symbol_strings.lexc
similarity index 100%
rename from src/fst/stems/symbol_strings.lexc
rename to src/fst/morphology/stems/symbol_strings.lexc
diff --git a/src/fst/stems/verbs.lexc b/src/fst/morphology/stems/verbs.lexc
similarity index 100%
rename from src/fst/stems/verbs.lexc
rename to src/fst/morphology/stems/verbs.lexc
diff --git a/src/orthography/Makefile.am b/src/fst/orthography/Makefile.am
similarity index 93%
rename from src/orthography/Makefile.am
rename to src/fst/orthography/Makefile.am
index d093009f..ff46a78a 100644
--- a/src/orthography/Makefile.am
+++ b/src/fst/orthography/Makefile.am
@@ -15,8 +15,7 @@ GT_ORTHOGRAPHIC_REGEX_SRCS=\
GT_ORTHOGRAPHIC_XFSCRIPT_SRCS=\
allcaps.xfscript \
downcase-derived_proper-strings.xfscript \
- spellrelax-with-tags.xfscript \
- punctrelax.xfscript
+ spellrelax-with-tags.xfscript
# Add extra targets here:
GIELLA_ORTH_EXTRA_TARGETS=
diff --git a/src/orthography/allcaps.xfscript b/src/fst/orthography/allcaps.xfscript
similarity index 100%
rename from src/orthography/allcaps.xfscript
rename to src/fst/orthography/allcaps.xfscript
diff --git a/src/orthography/downcase-derived_proper-strings.xfscript b/src/fst/orthography/downcase-derived_proper-strings.xfscript
similarity index 100%
rename from src/orthography/downcase-derived_proper-strings.xfscript
rename to src/fst/orthography/downcase-derived_proper-strings.xfscript
diff --git a/src/orthography/inituppercase.regex b/src/fst/orthography/inituppercase.regex
similarity index 100%
rename from src/orthography/inituppercase.regex
rename to src/fst/orthography/inituppercase.regex
diff --git a/src/orthography/punctrelax.xfscript b/src/fst/orthography/punctrelax.xfscript
similarity index 100%
rename from src/orthography/punctrelax.xfscript
rename to src/fst/orthography/punctrelax.xfscript
diff --git a/src/orthography/spellrelax-mobile-keyboard.regex b/src/fst/orthography/spellrelax-mobile-keyboard.regex
similarity index 100%
rename from src/orthography/spellrelax-mobile-keyboard.regex
rename to src/fst/orthography/spellrelax-mobile-keyboard.regex
diff --git a/src/orthography/spellrelax-tags.regex b/src/fst/orthography/spellrelax-tags.regex
similarity index 100%
rename from src/orthography/spellrelax-tags.regex
rename to src/fst/orthography/spellrelax-tags.regex
diff --git a/src/orthography/spellrelax-with-tags.xfscript b/src/fst/orthography/spellrelax-with-tags.xfscript
similarity index 100%
rename from src/orthography/spellrelax-with-tags.xfscript
rename to src/fst/orthography/spellrelax-with-tags.xfscript
diff --git a/src/orthography/spellrelax.regex b/src/fst/orthography/spellrelax.regex
similarity index 100%
rename from src/orthography/spellrelax.regex
rename to src/fst/orthography/spellrelax.regex
diff --git a/src/phonetics/Makefile.am b/src/fst/phonetics/Makefile.am
similarity index 100%
rename from src/phonetics/Makefile.am
rename to src/fst/phonetics/Makefile.am
diff --git a/src/phonetics/tests/Makefile.am b/src/fst/phonetics/tests/Makefile.am
similarity index 100%
rename from src/phonetics/tests/Makefile.am
rename to src/fst/phonetics/tests/Makefile.am
diff --git a/src/phonetics/tests/run_tests.sh.in b/src/fst/phonetics/tests/run_tests.sh.in
similarity index 100%
rename from src/phonetics/tests/run_tests.sh.in
rename to src/fst/phonetics/tests/run_tests.sh.in
diff --git a/src/fst/phonetics/tests/tests/Makefile.am b/src/fst/phonetics/tests/tests/Makefile.am
new file mode 100644
index 00000000..dc17381e
--- /dev/null
+++ b/src/fst/phonetics/tests/tests/Makefile.am
@@ -0,0 +1,19 @@
+## Process this file with automake to produce Makefile.in
+## Copyright: Sámediggi/Divvun/UiT
+## Licence: GPL v3+
+
+######## Test targets: ###########
+
+if WANT_PHONETIC
+
+# List here (space separated) all yaml files to be run as part of make check:
+TESTS=
+
+# List tests that are presently (expected) failures here, ie things that should
+# be fixed *later*, but is not critical at the moment:
+XFAIL_TESTS=
+
+endif # WANT_PHONETIC
+
+#### Do NOT edit below here: ####
+include $(top_srcdir)/../giella-core/am-shared/src-phonetics-tests-dir-include.am
diff --git a/src/fst/phonetics/tests/tests/run_tests.sh.in b/src/fst/phonetics/tests/tests/run_tests.sh.in
new file mode 100644
index 00000000..baaa6f84
--- /dev/null
+++ b/src/fst/phonetics/tests/tests/run_tests.sh.in
@@ -0,0 +1,89 @@
+#!/bin/bash
+## Process this file with configure to produce the actual shell script
+## Copyright: Sámediggi/Divvun/UiT
+## Licence: GPL v3+
+
+# Test runner to test conversion to IPA.
+
+# Use autotools mechanisms to only run the configured fst types in the tests:
+fsttype=
+@CAN_HFST_TRUE@fsttype="$fsttype hfst"
+@CAN_XFST_TRUE@fsttype="$fsttype xfst"
+@CAN_FOMA_TRUE@fsttype="$fsttype foma"
+
+# Exit if all fst types have been shut off:
+if [[ "x$fsttype" == "x" ]]; then
+ echo "All transducer types have been shut off at configure time."
+ echo "Nothing to test. SKIPPED."
+ exit 77
+fi
+
+fst_num=$(echo "$fsttype" | wc -w)
+# Debug: echo Number of fst´s: $fst_num
+
+fst=$(grep -v '^#' $1 | grep -v '^\s*$' | grep 'fst' | cut -f2)
+
+# Debug:
+# echo FST: $fst
+
+grep -v '^#' $1 | grep -v '^\s*$' | tail -n +2 | cut -f1 > innput.txt
+grep -v '^#' $1 | grep -v '^\s*$' | tail -n +2 | cut -f2 > expect.txt
+
+###### Start testing: #######
+transducer_found=0
+fails=0
+
+# .---------- constant part!
+# vvvv vvvv-- colour code
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+BOLD='\033[1m'
+NC='\033[0m' # No Color
+
+
+# Loop over the transducer types first - we test both hfst, xfst and foma
+# according to the configuration:
+for f in $fsttype; do
+ # DEBUG: echo "Fst loop 35: $f"
+ if test $f == "xfst"; then
+ lookuptool="@LOOKUP@ -flags mbTT"
+ elif test $f == "foma"; then
+ lookuptool="@FLOOKUP@"
+ elif test $f == "hfst"; then
+ lookuptool="@HFST_LOOKUP@ -q"
+ else
+ let "Fail += 1"
+ echo "FAIL: Unknown fst type! FST=$f"
+ continue
+ fi
+ # Run lookup, then clean the output as follows before saving:
+ # 1. remove extra empty lines
+ # 2. convert two or more newlines to XXYYZZ (two newlines are cohort separators)
+ # 3. convert single newlines to ' XXXX ' - marks multiple outputs from single inut
+ # 4. convert XXYYZZ back to a single newline
+ # 5. cleanup on the last line
+ $lookuptool ../$fst.$f < innput.txt | cut -f1-2 \
+ | sed '${/^[[:space:]]*$/d;}' \
+ | perl -0pe 's/\n\n+/XXYYZZ/g;' \
+ | perl -0pe 's/\n/ XXXX /g;' \
+ | perl -pe 's/XXYYZZ/\n/g' \
+ | sed '$ s/ XXXX $/\n/' > output.${f}.txt
+
+ # The actual test:
+ diff expect.txt <(cut -f2- output.hfst.txt)
+
+ # Just to be sure, capture the output value - it might give different
+ # results for hfst, foma and xfst (that would be a bug in the offending
+ # one, but one never knows). This way the test will fail even if the last
+ # fst type being tested was successful if one of the earlier types failed.
+ #
+ # And if more than one fst type is tested, print output status for each:
+ if (($? > 0)) ; then
+ fails=$((fails+1))
+ (($fst_num > 1)) && echo -e " ${BOLD}FAILED: ${RED}$f${NC}"
+ else
+ (($fst_num > 1)) && echo -e " ${BOLD}PASSED: ${GREEN}$f${NC}"
+ fi
+done
+
+exit $fails
diff --git a/src/phonetics/txt2ipa.xfscript b/src/fst/phonetics/txt2ipa.xfscript
similarity index 100%
rename from src/phonetics/txt2ipa.xfscript
rename to src/fst/phonetics/txt2ipa.xfscript
diff --git a/src/hyphenation/Makefile.am b/src/fst/syllabification/Makefile.am
similarity index 100%
rename from src/hyphenation/Makefile.am
rename to src/fst/syllabification/Makefile.am
diff --git a/src/hyphenation/hyphenation.xfscript b/src/fst/syllabification/hyphenation.xfscript
similarity index 100%
rename from src/hyphenation/hyphenation.xfscript
rename to src/fst/syllabification/hyphenation.xfscript
diff --git a/src/tagsets/Makefile.am b/src/fst/tagsets/Makefile.am
similarity index 100%
rename from src/tagsets/Makefile.am
rename to src/fst/tagsets/Makefile.am
diff --git a/src/transcriptions/Makefile.am b/src/fst/transcriptions/Makefile.am
similarity index 100%
rename from src/transcriptions/Makefile.am
rename to src/fst/transcriptions/Makefile.am
diff --git a/src/transcriptions/transcriptor-abbrevs2text.lexc b/src/fst/transcriptions/transcriptor-abbrevs2text.lexc
similarity index 100%
rename from src/transcriptions/transcriptor-abbrevs2text.lexc
rename to src/fst/transcriptions/transcriptor-abbrevs2text.lexc
diff --git a/src/transcriptions/transcriptor-clock-digit2text.lexc b/src/fst/transcriptions/transcriptor-clock-digit2text.lexc
similarity index 100%
rename from src/transcriptions/transcriptor-clock-digit2text.lexc
rename to src/fst/transcriptions/transcriptor-clock-digit2text.lexc
diff --git a/src/transcriptions/transcriptor-date-digit2text.lexc b/src/fst/transcriptions/transcriptor-date-digit2text.lexc
similarity index 100%
rename from src/transcriptions/transcriptor-date-digit2text.lexc
rename to src/fst/transcriptions/transcriptor-date-digit2text.lexc
diff --git a/src/transcriptions/transcriptor-numbers-digit2text.lexc b/src/fst/transcriptions/transcriptor-numbers-digit2text.lexc
similarity index 100%
rename from src/transcriptions/transcriptor-numbers-digit2text.lexc
rename to src/fst/transcriptions/transcriptor-numbers-digit2text.lexc
diff --git a/test/src/morphology/generate-propernoun-lemmas.sh.in b/test/src/morphology/generate-propernoun-lemmas.sh.in
index b0b9541c..4efd8d4d 100644
--- a/test/src/morphology/generate-propernoun-lemmas.sh.in
+++ b/test/src/morphology/generate-propernoun-lemmas.sh.in
@@ -22,7 +22,7 @@
###### Variables: #######
POS=propernouns
### in ###
-source_file=${srcdir}/../../../src/fst/stems/${POS}.lexc
+source_file=${srcdir}/../../../src/fst/morphology/stems/${POS}.lexc
generator_file=./../../../src/generator-gt-norm
analyser_file=./../../../src/analyser-gt-norm
diff --git a/test/tools/spellcheckers/fstbased/desktop/hfst/accept-all-lemmas.sh.in b/test/tools/spellcheckers/fstbased/desktop/hfst/accept-all-lemmas.sh.in
index fc4ef778..0d465350 100755
--- a/test/tools/spellcheckers/fstbased/desktop/hfst/accept-all-lemmas.sh.in
+++ b/test/tools/spellcheckers/fstbased/desktop/hfst/accept-all-lemmas.sh.in
@@ -17,7 +17,7 @@
GIELLA_LANG=@GTLANG2@
ospell=@HFST_OSPELL@
### in ###
-source_files=${srcdir}/../../../../../../src/fst/stems/*.lexc
+source_files=${srcdir}/../../../../../../src/fst/morphology/stems/*.lexc
speller_dir=./../../../../../../tools/spellcheckers
### out ###
diff --git a/tools/spellcheckers/soovita.cpp.utf8.1 b/tools/spellcheckers/soovita.cpp.utf8.1
index 2240331c..dc969bc0 100644
--- a/tools/spellcheckers/soovita.cpp.utf8.1
+++ b/tools/spellcheckers/soovita.cpp.utf8.1
@@ -1,651 +1,651 @@
-/*
-* soovitaja
-* konstrueerib sona alusel stringe, mille korrektsust siis sonastikust
-* chkmin() abil kontrollitakse; kui mo~ni on korrektne so~na, siis
-* pannakse ta SugAhel -sse
-* kogu protsess ka"ib ja"rgmiselt:
-* 1. korda soovitaja poole po"o"rdudes po"o"rdutakse SugFirst() poole;
-* ja"rgmistel kordadel SugNext() poole
-* kui SugAhel == NULL, siis s.t. sobivaid soovitusi pole
-*/
-/*
-#include
-#include
-#include
-#include
-
-#include "ini_mrf.h"
-#include "soovita.h"
-#include "morf.h"
-#include "chup.h"
-#include "mrflags.h"
-
-extern MRF_FLAGS mrfFlags;
-
-char InsertCharsBeg[]="kpstvlmrahnejioü""dfubõg"; // <=0.37: äšöžz //
-char InsertChars[]="aeistlunkmodrvghjpäõ""büö"; // <= 0.12: fšžcwy //
-char EstAlphas[]="abcdefghijklmnopqrsšzžtuvwõäöüxy";
-
-struct _ChangeArray{
- char from;
- char *to;
-}CA[]={ // kujult, ling, klaviatuurilt //
- {'a', "ä" "e" "s"},
- {'b', "" "p" "nvgh"},
- {'c', "" "" "dv"},
- {'d', "" "t" "esr"},
- {'e', "" "a" "sdr"},
- {'f', "" "" "tdrvg"},
- {'g', "" "kjž" "tvhb"},
- {'h', "" "" "ungjb"},
- {'i', "" "j" "uko"},
- {'j', "" "igž" "unkmh"},
- {'k', "" "g" "ilmo"},
- {'l', "" "" "kopö"},
- {'m', "" "" "nkj"},
- {'n', "" "" "mhjb"},
- {'o', "öõ" "u" "ilkp"},
- {'p', "" "b" "loüö"},
- {'q', "" "" "a"},
- {'r', "" "" "etd"},
- {'s', "š" "z" "aed"},
- {'š', "s" "ž" ""},
- {'z', "ž" "s" "a"},
- {'ž', "z" "š" ""},
- {'t', "" "d" "rg"},
- {'u', "ü" "o" "ihj"},
- {'v', "" "" "gb"},
- {'w', "" "" "aes"},
- {'õ', "oö" "" "äü"},
- {'ä', "a" "" "õüö"},
- {'ö', "oõ" "" "lpäü"},
- {'ü', "u" "" "päõö"},
- {'x', "" "" "sd"},
- {'y', "" "ü" "tugh"},
- {'\xa2', "õöo" "" ""}, // o`, o', o^ //
- {'\x95', "õöo" "" ""},
- {'\xe4', "õöo" "" ""},
- {'\xa0', "ä""a" "" ""}, // a`, a', a^ //
- {'\x85', "ä""a" "" ""},
- {'\x83', "ä""a" "" ""},
- {'\xa3', "üu" "" ""}, // u`, u', u^ //
- {'\x97', "üu" "" ""},
- {'\x96', "üu" "" ""},
- {'\0', EstAlphas},
-};
-
-struct _ChangeArrayStr{
- char *from;
- char *to;
-}CAStr[]={
- {"sh", "š"},
- {"zh", "ž"},
- {"x", "ks"},
- {"f", "hv"},
- {"hv", "f"},
- {"ff", "hv"},
- {"mb", "mm"},
- {"ää", "ea"},
- {"g", "dž"},
- {"dž", "g"},
- {"j", "dž"},
- {"dž", "j"},
- {"data", "tada"},
- {"tada", "data"},
- {"o~", "õ"},
- {"a\"", "ä"},
- {"o\"", "ö"},
- {"u\"", "ü"},
- {"s^", "š"},
- {"z^", "ž"},
- {0, 0}
-};
-
-struct _ChangeArrayStr CAEStr[]={
- {"si", "seid"},
- {"si", "sid"},
- {"seid", "si"},
- {"seid", "sid"},
- {"sid", "si"},
- {"sid", "seid"},
- {0, 0}
-};
-
-char //roomanr,// tasand, tagasitasand;
-
-SUG *SugAhel=0; // soovituste ahel //
-
-// --------------------------------------------------------- //
-//#include //
-
-//#if defined (SPELLER)
-char IsInUDR(char *sisse);
-char IsInExcUDR(char *sisse);
-char *IsInChangeUDR(char *word);
-//#endif
-
-int SugGroupLevel(int Level){
- switch (Level){
- case 0:
- case 1:
- case 2:
- case 3:
- return 0;
- case 4:
- case 5:
- return 1;
- case 6:
- return 2;
- default:
- return 3;
- }
-}
-
-int SugUnGroupLevel(int Level){
- switch (Level){
- case 0:
- return 3;
- case 1:
- return 5;
- case 2:
- return 6;
- default:
- return 100;
- }
-}
-
-int SugSpell(char *word){ // =0 kui word on oige sona //
- int nr;
- char outstr2[STEMLEN*4];
- int maxtasand, tagasi;
-
- maxtasand = tasand;
- if (strlen(word)<=1){
- outstr2[0]=0;
- }
- else{
- chkmin(word, 0, outstr2, &nr, maxtasand, &tagasi);
- tagasitasand = tagasi;
- }
- if (outstr2[0])
- {
-//#if defined(SPELLER)
- if(mrfFlags.Chk(MF_SPELL) && !IsInExcUDR(word))
-//#endif
- return 0;
- }
-//#if defined(SPELLER)
- if(mrfFlags.Chk(MF_SPELL))
- {
- tagasitasand=0;
- if (IsInUDR(word)) return 0;
- }
-//#endif
- return 1;
-}
-
-void SugAdd(char *word){
- SUG *s, *s1;
- char tasanduus;
- s=(SUG *)malloc(sizeof(SUG));
- strcpy(s->tyvi, word);
- s->sug_next=0;
- s->tasand=tagasitasand;
- if (!SugAhel){
- SugAhel=s;
- s->sug_prev=0;
- }
- else{
- for (s1=SugAhel; s1->sug_next; s1=s1->sug_next);
- s1->sug_next=s;
- s->sug_prev=s1;
- }
- tasanduus=SugUnGroupLevel(SugGroupLevel(tagasitasand));
- if (tasanduustasand1) tasand1=tagasitasand;
-// ch[0]=ChToLower(ch[0]);
-
- if (!SugSpell(word)){
- if (tagasitasand>tasand1) tasand1=tagasitasand;
- }
- else{
- word[0]=ChToUpper(word[0]);
- }
-
-// if (!SugSpell(ch)){
- if (tagasitasand>tasand1) tasand1=tagasitasand;
- }
- else{
- ch[0]=ChToUpper(ch[0]);
- }//
- ch--;
- (*ch)=' ';
- tagasitasand=tasand1;
- SugAdd(word);
- return 1;
-}
-
-void SugFree(void){
- SUG *s;
- for (s=SugAhel; s; ){
- SugAhel=SugAhel->sug_next;
- free(s);
- s=SugAhel;
- }
-}
-
-int SugCapitalize(char *word){
- char *ch;
- int i;
- SUG *SA;
- if (!ChIsUpper(*word)){
- return 0;
- }
- for (SA=SugAhel; SA; SA=SA->sug_next){
- SA->tyvi[0]=ChToUpper(SA->tyvi[0]);
- }
-
- i=1;
- for (ch=word; *ch; ch++){
- i&=ChIsUpperUP(*ch);
- }
- if (!i){
- return 0;
- }
- for (SA=SugAhel; SA; SA=SA->sug_next){
- for (ch=SA->tyvi; *ch; ch++){
- (*ch)=ChToUpper(*ch);
- }
- }
- return 0;
-}
-
-int SugSimilar(char *word1, char *word2){
- int pos;
- pos=strlen(word1);
- if (abs(pos-strlen(word2))>1) return 0;
- if (memcmp(word1, word2, pos)){
- return 0;
- }
- if (word2[pos]){
- if (strchr(EstAlphas, word2[pos])){
- return 0;
- }
- }
- return 1;
-}
-
-int SugRemoveCopy(){
- SUG *SA, *SA1, *SA2;
- for (SA=SugAhel; SA; SA=SA->sug_next){
- for (SA2=SugAhel; SA2; ){
- if (SA==SA2){
- SA2=SA2->sug_next;
- continue;
- }
- if (SugSimilar(SA->tyvi, SA2->tyvi)){
- if (SA2->sug_prev) SA2->sug_prev->sug_next=SA2->sug_next;
- if (SA2->sug_next) SA2->sug_next->sug_prev=SA2->sug_prev;
- SA1=SA2;
- SA2=SA2->sug_next;
- if (SA1==SugAhel){
- SugAhel=SA2;
- }
- free(SA1);
- }
- else{
- SA2=SA2->sug_next;
- }
- }
- }
- return 0;
-}
-
-int SugRemoveImmoderate(){
- SUG *SA, *SB;
- int Useful;
- if (!SugAhel) return 0;
- Useful=SugGroupLevel(SugAhel->tasand);
- for (SA=SugAhel; SA; SA=SA->sug_next){
- if (SugGroupLevel(SA->tasand)!=Useful){
- SA->sug_prev->sug_next=0;
- SB=SugAhel;
- SugAhel=SA;
- SugFree();
- SugAhel=SB;
- return 0;
- }
- }
- return 0;
-}
-
-int SugOrder(){
- SUG* SA, *SA1, *SA2;
- for (SA=SugAhel; SA; SA=SA->sug_next){
- SA1=SA;
- for (SA2=SA->sug_next; SA2; SA2=SA2->sug_next){
- if (SA2->tasandtasand){
- SA1=SA2;
- }
- }
- if (SA1!=SA){
- if (SA1->sug_prev) SA1->sug_prev->sug_next=SA1->sug_next;
- if (SA1->sug_next) SA1->sug_next->sug_prev=SA1->sug_prev;
- SA1->sug_prev=SA->sug_prev;
- if (SA->sug_prev) SA->sug_prev->sug_next=SA1;
- SA1->sug_next=SA;
- SA->sug_prev=SA1;
- SA=SA1;
- if (!SA->sug_prev) SugAhel=SA;
- }
- }
- return 0;
-}
-
-int SugNext(const char *word, int len){
- SugFree();
- return 0;
-}
-
-int SugFirst(const char *word, int len, int sygavus, int *reserveeritud_tulelvuku_tarbeks){
- int i, j, len2;
- char *ch;
- char *wordlow, *testword;
- char c, tasand1;
- struct _ChangeArray *CA1;
- struct _ChangeArrayStr *CAStr1;
-// char roomanr1=roomanr;
-// roomanr=0;
-
- SugFree();
-
- wordlow=(char *)malloc(len+1);
- testword=(char *)malloc(len+20);
- for (i=0; i<=len; i++){
- wordlow[i]=ChToLower(word[i]);
- }
-
-//#if defined(SPELLER)
- if(mrfFlags.Chk(MF_SPELL))
- {
- tagasitasand=0;
- ch=IsInChangeUDR((char *)word);
- if (ch){
- SugAdd(ch);
- }
- }
-//#endif
-// SUG_CASE: //
- tasand1=sygavus;
- tasand=100;
- SugCheck(wordlow);
- tasand=tasand1;
-
-// SUG_ABBR: //
- strcpy(testword, wordlow);
- strcat(testword, "-"); // sona- //
- SugCheck(testword);
-
- ch=strchr(word, '-');
- if ((ch) && (ch!=word)){
- if ((ChIsUpper(*(ch-1))) && (!ChIsUpperUP(*(ch+1)))){
- strcpy(testword, word); // BNS- //
- testword[ch-word+1] = 'i'; // BNS-i //
- strcpy(testword+(ch-word)+2, ch+1); // BNS-ile //
- tasand1=tasand;
- tasand=100; // Natuke sygavamalt //
- SugCheck(testword);
- tasand=tasand1;
- }
- }
- else if (ChIsUpper(*word)){
- ch=(char *)word;
- for (i=1; i; ch++, i=ChIsUpper(*ch));
- if (!ChIsUpperUP(*ch)){
- strcpy(testword, word); // BNS //
- testword[ch-word] = 'i'; // BNSi //
- strcpy(testword+(ch-word)+1, ch); // BNSile //
- tasand1=tasand;
- tasand=100; // Natuke sygavamalt //
- SugCheck(testword);
- tasand=tasand1;
- }
- }
-
-// SUG_ADDSPACE: Renel oli wordlow word asemel HJK 12.05.98 //
- for (i=1; ifrom; CAStr1++){
- if (memcmp(wordlow+i, CAStr1->from, strlen(CAStr1->from))==0){
- memcpy(testword, wordlow, i);
- strcpy(testword+i, CAStr1->to);
- strcat(testword, wordlow+i+strlen(CAStr1->from));
- SugCheck(testword);
- }
- }
- }
-
-// SUG_CHANGEENDBLOCKS: //
- for (CAStr1=CAEStr; CAStr1->from; CAStr1++){
- len2=strlen(CAStr1->from);
- if (len2<=len){
- if (strcmp(wordlow+len-len2, CAStr1->from)==0){
- memcpy(testword, wordlow, len-len2);
- strcpy(testword+len-len2, CAStr1->to);
- SugCheck(testword);
- }
- }
- }
-
-// gi* -> *gi //
- for (i=3; i<=5; i++){
- if (i>len) break;
- if (memcmp(wordlow+len-i, "gi", 2)==0){
- memcpy(testword, wordlow, len-i);
- strcpy(testword+len-i, wordlow+len-i+2);
- strcpy(testword+len-2, "gi");
- SugCheck(testword);
- strcpy(testword+len-2, "ki");
- SugCheck(testword);
- }
- }
-
-// SUG_DELLETTERS: //
- if (len>1){
- for (i=0; ifrom)||(!CA1->from)){
- for (ch=CA1->to; *ch; ch++){
- testword[i]=*ch;
- SugCheck(testword);
- }
- break;
- }
- }
- }
-
-// SUG_INSERTLETTERS: //
- for (ch=InsertChars; *ch; ch++){
- for (i=1; i
+#include
+#include
+#include
+
+#include "ini_mrf.h"
+#include "soovita.h"
+#include "morf.h"
+#include "chup.h"
+#include "mrflags.h"
+
+extern MRF_FLAGS mrfFlags;
+
+char InsertCharsBeg[]="kpstvlmrahnejioü""dfubõg"; // <=0.37: äšöžz //
+char InsertChars[]="aeistlunkmodrvghjpäõ""büö"; // <= 0.12: fšžcwy //
+char EstAlphas[]="abcdefghijklmnopqrsšzžtuvwõäöüxy";
+
+struct _ChangeArray{
+ char from;
+ char *to;
+}CA[]={ // kujult, ling, klaviatuurilt //
+ {'a', "ä" "e" "s"},
+ {'b', "" "p" "nvgh"},
+ {'c', "" "" "dv"},
+ {'d', "" "t" "esr"},
+ {'e', "" "a" "sdr"},
+ {'f', "" "" "tdrvg"},
+ {'g', "" "kjž" "tvhb"},
+ {'h', "" "" "ungjb"},
+ {'i', "" "j" "uko"},
+ {'j', "" "igž" "unkmh"},
+ {'k', "" "g" "ilmo"},
+ {'l', "" "" "kopö"},
+ {'m', "" "" "nkj"},
+ {'n', "" "" "mhjb"},
+ {'o', "öõ" "u" "ilkp"},
+ {'p', "" "b" "loüö"},
+ {'q', "" "" "a"},
+ {'r', "" "" "etd"},
+ {'s', "š" "z" "aed"},
+ {'š', "s" "ž" ""},
+ {'z', "ž" "s" "a"},
+ {'ž', "z" "š" ""},
+ {'t', "" "d" "rg"},
+ {'u', "ü" "o" "ihj"},
+ {'v', "" "" "gb"},
+ {'w', "" "" "aes"},
+ {'õ', "oö" "" "äü"},
+ {'ä', "a" "" "õüö"},
+ {'ö', "oõ" "" "lpäü"},
+ {'ü', "u" "" "päõö"},
+ {'x', "" "" "sd"},
+ {'y', "" "ü" "tugh"},
+ {'\xa2', "õöo" "" ""}, // o`, o', o^ //
+ {'\x95', "õöo" "" ""},
+ {'\xe4', "õöo" "" ""},
+ {'\xa0', "ä""a" "" ""}, // a`, a', a^ //
+ {'\x85', "ä""a" "" ""},
+ {'\x83', "ä""a" "" ""},
+ {'\xa3', "üu" "" ""}, // u`, u', u^ //
+ {'\x97', "üu" "" ""},
+ {'\x96', "üu" "" ""},
+ {'\0', EstAlphas},
+};
+
+struct _ChangeArrayStr{
+ char *from;
+ char *to;
+}CAStr[]={
+ {"sh", "š"},
+ {"zh", "ž"},
+ {"x", "ks"},
+ {"f", "hv"},
+ {"hv", "f"},
+ {"ff", "hv"},
+ {"mb", "mm"},
+ {"ää", "ea"},
+ {"g", "dž"},
+ {"dž", "g"},
+ {"j", "dž"},
+ {"dž", "j"},
+ {"data", "tada"},
+ {"tada", "data"},
+ {"o~", "õ"},
+ {"a\"", "ä"},
+ {"o\"", "ö"},
+ {"u\"", "ü"},
+ {"s^", "š"},
+ {"z^", "ž"},
+ {0, 0}
+};
+
+struct _ChangeArrayStr CAEStr[]={
+ {"si", "seid"},
+ {"si", "sid"},
+ {"seid", "si"},
+ {"seid", "sid"},
+ {"sid", "si"},
+ {"sid", "seid"},
+ {0, 0}
+};
+
+char //roomanr,// tasand, tagasitasand;
+
+SUG *SugAhel=0; // soovituste ahel //
+
+// --------------------------------------------------------- //
+//#include //
+
+//#if defined (SPELLER)
+char IsInUDR(char *sisse);
+char IsInExcUDR(char *sisse);
+char *IsInChangeUDR(char *word);
+//#endif
+
+int SugGroupLevel(int Level){
+ switch (Level){
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ return 0;
+ case 4:
+ case 5:
+ return 1;
+ case 6:
+ return 2;
+ default:
+ return 3;
+ }
+}
+
+int SugUnGroupLevel(int Level){
+ switch (Level){
+ case 0:
+ return 3;
+ case 1:
+ return 5;
+ case 2:
+ return 6;
+ default:
+ return 100;
+ }
+}
+
+int SugSpell(char *word){ // =0 kui word on oige sona //
+ int nr;
+ char outstr2[STEMLEN*4];
+ int maxtasand, tagasi;
+
+ maxtasand = tasand;
+ if (strlen(word)<=1){
+ outstr2[0]=0;
+ }
+ else{
+ chkmin(word, 0, outstr2, &nr, maxtasand, &tagasi);
+ tagasitasand = tagasi;
+ }
+ if (outstr2[0])
+ {
+//#if defined(SPELLER)
+ if(mrfFlags.Chk(MF_SPELL) && !IsInExcUDR(word))
+//#endif
+ return 0;
+ }
+//#if defined(SPELLER)
+ if(mrfFlags.Chk(MF_SPELL))
+ {
+ tagasitasand=0;
+ if (IsInUDR(word)) return 0;
+ }
+//#endif
+ return 1;
+}
+
+void SugAdd(char *word){
+ SUG *s, *s1;
+ char tasanduus;
+ s=(SUG *)malloc(sizeof(SUG));
+ strcpy(s->tyvi, word);
+ s->sug_next=0;
+ s->tasand=tagasitasand;
+ if (!SugAhel){
+ SugAhel=s;
+ s->sug_prev=0;
+ }
+ else{
+ for (s1=SugAhel; s1->sug_next; s1=s1->sug_next);
+ s1->sug_next=s;
+ s->sug_prev=s1;
+ }
+ tasanduus=SugUnGroupLevel(SugGroupLevel(tagasitasand));
+ if (tasanduustasand1) tasand1=tagasitasand;
+// ch[0]=ChToLower(ch[0]);
+
+ if (!SugSpell(word)){
+ if (tagasitasand>tasand1) tasand1=tagasitasand;
+ }
+ else{
+ word[0]=ChToUpper(word[0]);
+ }
+
+// if (!SugSpell(ch)){
+ if (tagasitasand>tasand1) tasand1=tagasitasand;
+ }
+ else{
+ ch[0]=ChToUpper(ch[0]);
+ }//
+ ch--;
+ (*ch)=' ';
+ tagasitasand=tasand1;
+ SugAdd(word);
+ return 1;
+}
+
+void SugFree(void){
+ SUG *s;
+ for (s=SugAhel; s; ){
+ SugAhel=SugAhel->sug_next;
+ free(s);
+ s=SugAhel;
+ }
+}
+
+int SugCapitalize(char *word){
+ char *ch;
+ int i;
+ SUG *SA;
+ if (!ChIsUpper(*word)){
+ return 0;
+ }
+ for (SA=SugAhel; SA; SA=SA->sug_next){
+ SA->tyvi[0]=ChToUpper(SA->tyvi[0]);
+ }
+
+ i=1;
+ for (ch=word; *ch; ch++){
+ i&=ChIsUpperUP(*ch);
+ }
+ if (!i){
+ return 0;
+ }
+ for (SA=SugAhel; SA; SA=SA->sug_next){
+ for (ch=SA->tyvi; *ch; ch++){
+ (*ch)=ChToUpper(*ch);
+ }
+ }
+ return 0;
+}
+
+int SugSimilar(char *word1, char *word2){
+ int pos;
+ pos=strlen(word1);
+ if (abs(pos-strlen(word2))>1) return 0;
+ if (memcmp(word1, word2, pos)){
+ return 0;
+ }
+ if (word2[pos]){
+ if (strchr(EstAlphas, word2[pos])){
+ return 0;
+ }
+ }
+ return 1;
+}
+
+int SugRemoveCopy(){
+ SUG *SA, *SA1, *SA2;
+ for (SA=SugAhel; SA; SA=SA->sug_next){
+ for (SA2=SugAhel; SA2; ){
+ if (SA==SA2){
+ SA2=SA2->sug_next;
+ continue;
+ }
+ if (SugSimilar(SA->tyvi, SA2->tyvi)){
+ if (SA2->sug_prev) SA2->sug_prev->sug_next=SA2->sug_next;
+ if (SA2->sug_next) SA2->sug_next->sug_prev=SA2->sug_prev;
+ SA1=SA2;
+ SA2=SA2->sug_next;
+ if (SA1==SugAhel){
+ SugAhel=SA2;
+ }
+ free(SA1);
+ }
+ else{
+ SA2=SA2->sug_next;
+ }
+ }
+ }
+ return 0;
+}
+
+int SugRemoveImmoderate(){
+ SUG *SA, *SB;
+ int Useful;
+ if (!SugAhel) return 0;
+ Useful=SugGroupLevel(SugAhel->tasand);
+ for (SA=SugAhel; SA; SA=SA->sug_next){
+ if (SugGroupLevel(SA->tasand)!=Useful){
+ SA->sug_prev->sug_next=0;
+ SB=SugAhel;
+ SugAhel=SA;
+ SugFree();
+ SugAhel=SB;
+ return 0;
+ }
+ }
+ return 0;
+}
+
+int SugOrder(){
+ SUG* SA, *SA1, *SA2;
+ for (SA=SugAhel; SA; SA=SA->sug_next){
+ SA1=SA;
+ for (SA2=SA->sug_next; SA2; SA2=SA2->sug_next){
+ if (SA2->tasandtasand){
+ SA1=SA2;
+ }
+ }
+ if (SA1!=SA){
+ if (SA1->sug_prev) SA1->sug_prev->sug_next=SA1->sug_next;
+ if (SA1->sug_next) SA1->sug_next->sug_prev=SA1->sug_prev;
+ SA1->sug_prev=SA->sug_prev;
+ if (SA->sug_prev) SA->sug_prev->sug_next=SA1;
+ SA1->sug_next=SA;
+ SA->sug_prev=SA1;
+ SA=SA1;
+ if (!SA->sug_prev) SugAhel=SA;
+ }
+ }
+ return 0;
+}
+
+int SugNext(const char *word, int len){
+ SugFree();
+ return 0;
+}
+
+int SugFirst(const char *word, int len, int sygavus, int *reserveeritud_tulelvuku_tarbeks){
+ int i, j, len2;
+ char *ch;
+ char *wordlow, *testword;
+ char c, tasand1;
+ struct _ChangeArray *CA1;
+ struct _ChangeArrayStr *CAStr1;
+// char roomanr1=roomanr;
+// roomanr=0;
+
+ SugFree();
+
+ wordlow=(char *)malloc(len+1);
+ testword=(char *)malloc(len+20);
+ for (i=0; i<=len; i++){
+ wordlow[i]=ChToLower(word[i]);
+ }
+
+//#if defined(SPELLER)
+ if(mrfFlags.Chk(MF_SPELL))
+ {
+ tagasitasand=0;
+ ch=IsInChangeUDR((char *)word);
+ if (ch){
+ SugAdd(ch);
+ }
+ }
+//#endif
+// SUG_CASE: //
+ tasand1=sygavus;
+ tasand=100;
+ SugCheck(wordlow);
+ tasand=tasand1;
+
+// SUG_ABBR: //
+ strcpy(testword, wordlow);
+ strcat(testword, "-"); // sona- //
+ SugCheck(testword);
+
+ ch=strchr(word, '-');
+ if ((ch) && (ch!=word)){
+ if ((ChIsUpper(*(ch-1))) && (!ChIsUpperUP(*(ch+1)))){
+ strcpy(testword, word); // BNS- //
+ testword[ch-word+1] = 'i'; // BNS-i //
+ strcpy(testword+(ch-word)+2, ch+1); // BNS-ile //
+ tasand1=tasand;
+ tasand=100; // Natuke sygavamalt //
+ SugCheck(testword);
+ tasand=tasand1;
+ }
+ }
+ else if (ChIsUpper(*word)){
+ ch=(char *)word;
+ for (i=1; i; ch++, i=ChIsUpper(*ch));
+ if (!ChIsUpperUP(*ch)){
+ strcpy(testword, word); // BNS //
+ testword[ch-word] = 'i'; // BNSi //
+ strcpy(testword+(ch-word)+1, ch); // BNSile //
+ tasand1=tasand;
+ tasand=100; // Natuke sygavamalt //
+ SugCheck(testword);
+ tasand=tasand1;
+ }
+ }
+
+// SUG_ADDSPACE: Renel oli wordlow word asemel HJK 12.05.98 //
+ for (i=1; ifrom; CAStr1++){
+ if (memcmp(wordlow+i, CAStr1->from, strlen(CAStr1->from))==0){
+ memcpy(testword, wordlow, i);
+ strcpy(testword+i, CAStr1->to);
+ strcat(testword, wordlow+i+strlen(CAStr1->from));
+ SugCheck(testword);
+ }
+ }
+ }
+
+// SUG_CHANGEENDBLOCKS: //
+ for (CAStr1=CAEStr; CAStr1->from; CAStr1++){
+ len2=strlen(CAStr1->from);
+ if (len2<=len){
+ if (strcmp(wordlow+len-len2, CAStr1->from)==0){
+ memcpy(testword, wordlow, len-len2);
+ strcpy(testword+len-len2, CAStr1->to);
+ SugCheck(testword);
+ }
+ }
+ }
+
+// gi* -> *gi //
+ for (i=3; i<=5; i++){
+ if (i>len) break;
+ if (memcmp(wordlow+len-i, "gi", 2)==0){
+ memcpy(testword, wordlow, len-i);
+ strcpy(testword+len-i, wordlow+len-i+2);
+ strcpy(testword+len-2, "gi");
+ SugCheck(testword);
+ strcpy(testword+len-2, "ki");
+ SugCheck(testword);
+ }
+ }
+
+// SUG_DELLETTERS: //
+ if (len>1){
+ for (i=0; ifrom)||(!CA1->from)){
+ for (ch=CA1->to; *ch; ch++){
+ testword[i]=*ch;
+ SugCheck(testword);
+ }
+ break;
+ }
+ }
+ }
+
+// SUG_INSERTLETTERS: //
+ for (ch=InsertChars; *ch; ch++){
+ for (i=1; i