Skip to content

Commit

Permalink
ICU-22100 Improve Japanese phrase breaking performance
Browse files Browse the repository at this point in the history
  • Loading branch information
allensu05 committed Jan 20, 2023
1 parent e26b2ad commit 63b0d17
Showing 1 changed file with 33 additions and 53 deletions.
86 changes: 33 additions & 53 deletions icu4c/source/common/mlbe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ MlBreakEngine::~MlBreakEngine() {}

namespace {
const char16_t INVALID = u'|';
const int32_t MAX_FEATURE = 13;
}

int32_t MlBreakEngine::divideUpRange(UText *inText, int32_t rangeStart, int32_t rangeEnd,
Expand Down Expand Up @@ -135,83 +134,64 @@ void MlBreakEngine::evaluateBreakpoint(UChar32* elementList, int32_t index, int3
return;
}

UnicodeString featureList[MAX_FEATURE];
int32_t listLength = 0;
UnicodeString feature;
int32_t score = fNegativeSum;

if (elementList[0] != INVALID) {
featureList[listLength].append(u"UW1:", 4).append(elementList[0]);
listLength++;
// When the key doesn't exist, Hashtable.geti(key) returns 0 and 2 * 0 = 0.
// So, we can skip to check whether fModel includes key featureList[j] or not.
score += (2 * fModel.geti(feature.setTo(u"UW1:", 4).append(elementList[0])));
}
if (elementList[1] != INVALID) {
featureList[listLength].append(u"UW2:", 4).append(elementList[1]);
listLength++;
score += (2 * fModel.geti(feature.setTo(u"UW2:", 4).append(elementList[1])));
}
if (elementList[2] != INVALID) {
featureList[listLength].append(u"UW3:", 4).append(elementList[2]);
listLength++;
score += (2 * fModel.geti(feature.setTo(u"UW3:", 4).append(elementList[2])));
}
if (elementList[3] != INVALID) {
featureList[listLength].append(u"UW4:", 4).append(elementList[3]);
listLength++;
score += (2 * fModel.geti(feature.setTo(u"UW4:", 4).append(elementList[3])));
}
if (elementList[4] != INVALID) {
featureList[listLength].append(u"UW5:", 4).append(elementList[4]);
listLength++;
score += (2 * fModel.geti(feature.setTo(u"UW5:", 4).append(elementList[4])));
}
if (elementList[5] != INVALID) {
featureList[listLength].append(u"UW6:", 4).append(elementList[5]);
listLength++;
score += (2 * fModel.geti(feature.setTo(u"UW6:", 4).append(elementList[5])));
}
if (elementList[1] != INVALID && elementList[2] != INVALID) {
featureList[listLength].append(u"BW1:", 4).append(elementList[1]).append(elementList[2]);
listLength++;
score += (2 * fModel.geti(
feature.setTo(u"BW1:", 4).append(elementList[1]).append(elementList[2])));
}
if (elementList[2] != INVALID && elementList[3] != INVALID) {
featureList[listLength].append(u"BW2:", 4).append(elementList[2]).append(elementList[3]);
listLength++;
score += (2 * fModel.geti(
feature.setTo(u"BW2:", 4).append(elementList[2]).append(elementList[3])));
}
if (elementList[3] != INVALID && elementList[4] != INVALID) {
featureList[listLength].append(u"BW3:", 4).append(elementList[3]).append(elementList[4]);
listLength++;
score += (2 * fModel.geti(
feature.setTo(u"BW3:", 4).append(elementList[3]).append(elementList[4])));
}
if (elementList[0] != INVALID && elementList[1] != INVALID && elementList[2] != INVALID) {
featureList[listLength]
.append(u"TW1:", 4)
.append(elementList[0])
.append(elementList[1])
.append(elementList[2]);
listLength++;
score += (2 * fModel.geti(feature.setTo(u"TW1:", 4)
.append(elementList[0])
.append(elementList[1])
.append(elementList[2])));
}
if (elementList[1] != INVALID && elementList[2] != INVALID && elementList[3] != INVALID) {
featureList[listLength]
.append(u"TW2:", 4)
.append(elementList[1])
.append(elementList[2])
.append(elementList[3]);
listLength++;
score += (2 * fModel.geti(feature.setTo(u"TW2:", 4)
.append(elementList[1])
.append(elementList[2])
.append(elementList[3])));
}
if (elementList[2] != INVALID && elementList[3] != INVALID && elementList[4] != INVALID) {
featureList[listLength]
.append(u"TW3:", 4)
.append(elementList[2])
.append(elementList[3])
.append(elementList[4]);
listLength++;
score += (2 * fModel.geti(feature.setTo(u"TW3:", 4)
.append(elementList[2])
.append(elementList[3])
.append(elementList[4])));
}
if (elementList[3] != INVALID && elementList[4] != INVALID && elementList[5] != INVALID) {
featureList[listLength]
.append(u"TW4:", 4)
.append(elementList[3])
.append(elementList[4])
.append(elementList[5]);
listLength++;
}

int32_t score = fNegativeSum;
for (int32_t j = 0; j < listLength; j++) {
// When the key doesn't exist, geti(key) returns 0 and 2 * 0 = 0.
// The following line will not be changed so we can skip to check whether fModel includes
// key featureList[j] or not.
score += (2 * fModel.geti(featureList[j]));
score += (2 * fModel.geti(feature.setTo(u"TW4:", 4)
.append(elementList[3])
.append(elementList[4])
.append(elementList[5])));
}
if (score > 0) {
boundary.addElement(index, status);
Expand Down

0 comments on commit 63b0d17

Please sign in to comment.