diff --git a/lttoolbox/compiler.cc b/lttoolbox/compiler.cc
index 0089f288..e4dbce89 100644
--- a/lttoolbox/compiler.cc
+++ b/lttoolbox/compiler.cc
@@ -58,6 +58,7 @@ wstring const Compiler::COMPILER_GROUP_ELEM = L"g";
wstring const Compiler::COMPILER_LEMMA_ATTR = L"lm";
wstring const Compiler::COMPILER_IGNORE_ATTR = L"i";
wstring const Compiler::COMPILER_IGNORE_YES_VAL = L"yes";
+wstring const Compiler::COMPILER_REGEX_ATTR = L"regex";
wstring const Compiler::COMPILER_ALT_ATTR = L"alt";
wstring const Compiler::COMPILER_V_ATTR = L"v";
wstring const Compiler::COMPILER_VL_ATTR = L"vl";
@@ -127,6 +128,9 @@ Compiler::parse(string const &file, wstring const &dir)
// Minimize transducers
for(auto& it : sections)
{
+ if (it.first.size() > 6 && it.first.substr(it.first.size()-6) == L"@regex") {
+ continue;
+ }
it.second.minimize();
}
@@ -208,13 +212,20 @@ Compiler::procParDef()
if(type != XML_READER_TYPE_END_ELEMENT)
{
current_paradigm = attrib(COMPILER_N_ATTR);
+ current_minimise = true;
+ if (attrib(COMPILER_REGEX_ATTR) == L"yes") {
+ current_minimise = false;
+ }
}
else
{
if(!paradigms[current_paradigm].isEmpty())
{
- paradigms[current_paradigm].minimize();
- paradigms[current_paradigm].joinFinals();
+ if (current_minimise) {
+ paradigms[current_paradigm].minimize();
+ } else {
+ paradigms[current_paradigm].joinFinals();
+ }
current_paradigm = L"";
}
}
diff --git a/lttoolbox/compiler.h b/lttoolbox/compiler.h
index 1d70c235..b7f62aad 100644
--- a/lttoolbox/compiler.h
+++ b/lttoolbox/compiler.h
@@ -81,6 +81,11 @@ class Compiler
*/
wstring current_section;
+ /**
+ * Whether the current section of paradigm should be minimised
+ */
+ bool current_minimise;
+
/**
* The direction of the compilation, 'lr' (left-to-right) or 'rl'
* (right-to-left)
@@ -325,6 +330,7 @@ class Compiler
LTTOOLBOX_IMPORTS static wstring const COMPILER_LEMMA_ATTR;
LTTOOLBOX_IMPORTS static wstring const COMPILER_IGNORE_ATTR;
LTTOOLBOX_IMPORTS static wstring const COMPILER_IGNORE_YES_VAL;
+ LTTOOLBOX_IMPORTS static wstring const COMPILER_REGEX_ATTR;
LTTOOLBOX_IMPORTS static wstring const COMPILER_ALT_ATTR;
LTTOOLBOX_IMPORTS static wstring const COMPILER_V_ATTR;
LTTOOLBOX_IMPORTS static wstring const COMPILER_VL_ATTR;
diff --git a/lttoolbox/dix.dtd b/lttoolbox/dix.dtd
index 3eb94236..ff56034e 100644
--- a/lttoolbox/dix.dtd
+++ b/lttoolbox/dix.dtd
@@ -44,6 +44,7 @@
diff --git a/lttoolbox/dix.rnc b/lttoolbox/dix.rnc
index b60ecb7d..a5984dfc 100644
--- a/lttoolbox/dix.rnc
+++ b/lttoolbox/dix.rnc
@@ -38,7 +38,7 @@ attlist.pardefs &= empty
# paradigm definition section
pardef = element pardef { attlist.pardef, e+ }
# paradigm definition
-attlist.pardef &= attribute n { text }
+attlist.pardef &= attribute n { text }, attribute regex { "no" | "yes" }?
# n: paradigm name
attlist.pardef &= attribute c { text }?
# c: comment about paradigm
@@ -47,7 +47,7 @@ section = element section { attlist.section, e+ }
attlist.section &=
attribute id { xsd:ID },
attribute type {
- "standard" | "inconditional" | "postblank" | "preblank"
+ "standard" | "inconditional" | "postblank" | "preblank" | "regex"
}
# id: dictionary section identifier
diff --git a/lttoolbox/dix.rng b/lttoolbox/dix.rng
index e651ca61..6deca105 100644
--- a/lttoolbox/dix.rng
+++ b/lttoolbox/dix.rng
@@ -103,6 +103,14 @@
+
+
+
+ no
+ yes
+
+
+
@@ -130,6 +138,7 @@
inconditional
postblank
preblank
+ regex
diff --git a/lttoolbox/fst_processor.cc b/lttoolbox/fst_processor.cc
index 6a4fb96e..12526186 100644
--- a/lttoolbox/fst_processor.cc
+++ b/lttoolbox/fst_processor.cc
@@ -881,7 +881,8 @@ FSTProcessor::classifyFinals()
inconditional.insert(it->second.getFinals().begin(),
it->second.getFinals().end());
}
- else if(endsWith(it->first, L"@standard"))
+ else if(endsWith(it->first, L"@standard") ||
+ endsWith(it->first, L"@regex"))
{
standard.insert(it->second.getFinals().begin(),
it->second.getFinals().end());
diff --git a/lttoolbox/transducer.cc b/lttoolbox/transducer.cc
index 5aa8d749..fc6fe8c4 100644
--- a/lttoolbox/transducer.cc
+++ b/lttoolbox/transducer.cc
@@ -319,6 +319,11 @@ Transducer::determinize(int const epsilon_tag)
int t = 0;
+ set finals_state;
+ for(auto& it2 : finals) {
+ finals_state.insert(it2.first);
+ }
+
while(size_Q_prime != Q_prime.size())
{
size_Q_prime = Q_prime.size();
@@ -326,11 +331,6 @@ Transducer::determinize(int const epsilon_tag)
for(auto& it : R[t])
{
- set finals_state;
- for(auto& it2 : finals)
- {
- finals_state.insert(it2.first);
- }
if(!isEmptyIntersection(Q_prime[it], finals_state))
{
double w = default_weight;
@@ -378,8 +378,8 @@ Transducer::determinize(int const epsilon_tag)
t = (t+1)%2;
}
- transitions = transitions_prime;
- finals = finals_prime;
+ transitions.swap(transitions_prime);
+ finals.swap(finals_prime);
initial = initial_prime;
}