Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make lt-comp go a bit faster #114

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions lttoolbox/compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ wstring const Compiler::COMPILER_GROUP_ELEM = L"g";
wstring const Compiler::COMPILER_LEMMA_ATTR = L"lm";
wstring const Compiler::COMPILER_IGNORE_ATTR = L"i";
wstring const Compiler::COMPILER_IGNORE_YES_VAL = L"yes";
wstring const Compiler::COMPILER_REGEX_ATTR = L"regex";
wstring const Compiler::COMPILER_ALT_ATTR = L"alt";
wstring const Compiler::COMPILER_V_ATTR = L"v";
wstring const Compiler::COMPILER_VL_ATTR = L"vl";
Expand Down Expand Up @@ -127,6 +128,9 @@ Compiler::parse(string const &file, wstring const &dir)
// Minimize transducers
for(auto& it : sections)
{
if (it.first.size() > 6 && it.first.substr(it.first.size()-6) == L"@regex") {
continue;
}
it.second.minimize();
}

Expand Down Expand Up @@ -208,13 +212,20 @@ Compiler::procParDef()
if(type != XML_READER_TYPE_END_ELEMENT)
{
current_paradigm = attrib(COMPILER_N_ATTR);
current_minimise = true;
if (attrib(COMPILER_REGEX_ATTR) == L"yes") {
current_minimise = false;
}
}
else
{
if(!paradigms[current_paradigm].isEmpty())
{
paradigms[current_paradigm].minimize();
paradigms[current_paradigm].joinFinals();
if (current_minimise) {
paradigms[current_paradigm].minimize();
} else {
paradigms[current_paradigm].joinFinals();
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this be if(current_minimise) minimize(); joinFinals() , ie. we want to joinFinals also when we minimize? (Seems like the current change would also alter how non-paradigms are compiled!)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minimize() calls reverse() which calls joinFinals(), so there shouldn't be a change in behavior.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Calls reverse twice in fact, so it both joins finals and initials … I suppose nowhere in that sequence any new (unjoined) final is added so it's safe then :)

current_paradigm = L"";
}
}
Expand Down
6 changes: 6 additions & 0 deletions lttoolbox/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ class Compiler
*/
wstring current_section;

/**
* Whether the current section of paradigm should be minimised
*/
bool current_minimise;

/**
* The direction of the compilation, 'lr' (left-to-right) or 'rl'
* (right-to-left)
Expand Down Expand Up @@ -325,6 +330,7 @@ class Compiler
LTTOOLBOX_IMPORTS static wstring const COMPILER_LEMMA_ATTR;
LTTOOLBOX_IMPORTS static wstring const COMPILER_IGNORE_ATTR;
LTTOOLBOX_IMPORTS static wstring const COMPILER_IGNORE_YES_VAL;
LTTOOLBOX_IMPORTS static wstring const COMPILER_REGEX_ATTR;
LTTOOLBOX_IMPORTS static wstring const COMPILER_ALT_ATTR;
LTTOOLBOX_IMPORTS static wstring const COMPILER_V_ATTR;
LTTOOLBOX_IMPORTS static wstring const COMPILER_VL_ATTR;
Expand Down
3 changes: 2 additions & 1 deletion lttoolbox/dix.dtd
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
<!-- paradigm definition -->
<!ATTLIST pardef
n CDATA #REQUIRED
regex (no|yes) #IMPLIED
>
<!-- n: paradigm name -->
<!ATTLIST pardef
Expand All @@ -56,7 +57,7 @@
<!-- dictionary section -->
<!ATTLIST section
id ID #REQUIRED
type (standard|inconditional|postblank|preblank) #REQUIRED
type (standard|inconditional|postblank|preblank|regex) #REQUIRED

>
<!-- id: dictionary section identifier -->
Expand Down
4 changes: 2 additions & 2 deletions lttoolbox/dix.rnc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ attlist.pardefs &= empty
# paradigm definition section
pardef = element pardef { attlist.pardef, e+ }
# paradigm definition
attlist.pardef &= attribute n { text }
attlist.pardef &= attribute n { text }, attribute regex { "no" | "yes" }?
# n: paradigm name
attlist.pardef &= attribute c { text }?
# c: comment about paradigm
Expand All @@ -47,7 +47,7 @@ section = element section { attlist.section, e+ }
attlist.section &=
attribute id { xsd:ID },
attribute type {
"standard" | "inconditional" | "postblank" | "preblank"
"standard" | "inconditional" | "postblank" | "preblank" | "regex"
}
# id: dictionary section identifier

Expand Down
9 changes: 9 additions & 0 deletions lttoolbox/dix.rng
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,14 @@
<!-- paradigm definition -->
<define name="attlist.pardef" combine="interleave">
<attribute name="n"/>
<optional>
<attribute name="regex">
<choice>
<value>no</value>
<value>yes</value>
</choice>
</attribute>
</optional>
</define>
<!-- n: paradigm name -->
<define name="attlist.pardef" combine="interleave">
Expand Down Expand Up @@ -130,6 +138,7 @@
<value>inconditional</value>
<value>postblank</value>
<value>preblank</value>
<value>regex</value>
</choice>
</attribute>
</define>
Expand Down
3 changes: 2 additions & 1 deletion lttoolbox/fst_processor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -881,7 +881,8 @@ FSTProcessor::classifyFinals()
inconditional.insert(it->second.getFinals().begin(),
it->second.getFinals().end());
}
else if(endsWith(it->first, L"@standard"))
else if(endsWith(it->first, L"@standard") ||
endsWith(it->first, L"@regex"))
{
standard.insert(it->second.getFinals().begin(),
it->second.getFinals().end());
Expand Down
14 changes: 7 additions & 7 deletions lttoolbox/transducer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -319,18 +319,18 @@ Transducer::determinize(int const epsilon_tag)

int t = 0;

set<int> finals_state;
for(auto& it2 : finals) {
finals_state.insert(it2.first);
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for our edification, can you explain why this block is moved?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's calculating a list of the final states in the original transducer, but in its original location it was recalculating it on each iteration but it won't have changed.

while(size_Q_prime != Q_prime.size())
{
size_Q_prime = Q_prime.size();
R[(t+1)%2].clear();

for(auto& it : R[t])
{
set<int> finals_state;
for(auto& it2 : finals)
{
finals_state.insert(it2.first);
}
if(!isEmptyIntersection(Q_prime[it], finals_state))
{
double w = default_weight;
Expand Down Expand Up @@ -378,8 +378,8 @@ Transducer::determinize(int const epsilon_tag)
t = (t+1)%2;
}

transitions = transitions_prime;
finals = finals_prime;
transitions.swap(transitions_prime);
finals.swap(finals_prime);
initial = initial_prime;
}

Expand Down