Skip to content

Commit

Permalink
Implement Suffix phonetic suggestion. Closes #2
Browse files Browse the repository at this point in the history
  • Loading branch information
mominul committed Nov 19, 2016
1 parent 8a5c019 commit 3064917
Show file tree
Hide file tree
Showing 6 changed files with 258 additions and 208 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Change Log
## Master
* Improve Phonetic support. Resolves issue [#2](https://github.com/OpenBangla/OpenBangla-Keyboard/issues/2). Now Phonetic support is mostly complete!
* A new button in TopBar for easily moving it.
* User can select candidate by clicking.
* These new layouts were added: **Munir Optima (uni)**, **Avro Easy**, **National (Jatiya)** and **Probhat**. Thanks to [OmicronLab](https://www.omicronlab.com/) because these layouts were also imported from [Avro Keyboard](https://www.omicronlab.com/avro-keyboard.html)!
Expand Down
4 changes: 2 additions & 2 deletions src/engine/libengine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ add_library(libEngine
database.h
cachemanager.cpp
cachemanager.h
phoneticsuggestion.cpp
phoneticsuggestion.h
PhoneticSuggestion.cpp
PhoneticSuggestion.h
keycode.h)

target_link_libraries(libEngine libShared Qt5::Core Qt5::Sql)
2 changes: 1 addition & 1 deletion src/engine/libengine/MethodPhonetic.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

#include <QJsonObject>
#include "Layout.h"
#include "phoneticsuggestion.h"
#include "PhoneticSuggestion.h"

class MethodPhonetic : public LayoutMth {
bool handledKey;
Expand Down
244 changes: 244 additions & 0 deletions src/engine/libengine/PhoneticSuggestion.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
/*
* OpenBangla Keyboard
* Copyright (C) 2015-2016 Muhammad Mominul Huque <mominul2082@gmail.com>
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/

/*
* This suggestion making technic is highly inspaired from ibus-avro.
* For showing respect to ibus-avro project & Mehdi Hasan Khan, I am
* releasing this code as MPL 2.0 . So this code is now dual licensed
* under the MPL 2 and the GNU GPL 3 .
*/

#include <QRegularExpression>
#include "PhoneticSuggestion.h"
#include "qlevenshtein.hpp"

PhoneticSuggestion::PhoneticSuggestion() {}

void PhoneticSuggestion::setLayout(QJsonObject lay) {
parser.setLayout(lay);
}

QMap<QString, QString> PhoneticSuggestion::separatePadding(QString word) {
QMap<QString, QString> cutted;
QRegularExpression rgx("(^(?::`|\\.`|[-\\]\\\\~!@#&*()_=+\\[{}'\";<>/?|.,])*?(?=(?:,{2,}))|^(?::`|\\.`|[-\\]\\\\~!@#&*()_=+\\[{}'\";<>/?|.,])*)(.*?(?:,,)*)((?::`|\\.`|[-\\]\\\\~!@#&*()_=+\\[{}'\";<>/?|.,])*$)");
QRegularExpressionMatch match = rgx.match(word);
if(match.hasMatch()) {
cutted["prefix"] = match.captured(1);
cutted["term"] = match.captured(2);
cutted["suffix"] = match.captured(3);
}
return cutted;
}

bool PhoneticSuggestion::isKar(QString word) {
if (word.length() < 1) {
return false;
}

return word.left(1).contains(QRegularExpression("^[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u09c4]$"));
}

bool PhoneticSuggestion::isVowel(QString word) {
if (word.length() < 1) {
return false;
}

return word.left(1).contains(QRegularExpression("^[\u0985\u0986\u0987\u0988\u0989\u098a\u098b\u098f\u0990\u0993\u0994\u098c\u09e1\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc]$"));
}

void PhoneticSuggestion::appendIfNotContains(QVector<QString> &array, QString item) {
if (!array.contains(item)) {
array.append(item);
}
}

QVector<QString> PhoneticSuggestion::getSuggestion(QString word) {
QVector<QString> suggestion;
QVector<QString> dictSuggestion;
bool autoExact;

QString phonetic = parser.parse(PadMap["term"]);

// Add AutoCorrect
if (autodict.getCorrected(word) != "") {
// Exact, its a smily
if (autodict.getCorrected(word) == word) {
autoExact = true;
suggestion.append(autodict.getCorrected(word));
} else {
autoExact = false;
dictSuggestion.append(autodict.getCorrected(word));
}
} else {
// The whole word if not present. So search without padding.
if (autodict.getCorrected(PadMap["term"]) != "") {
autoExact = false;
dictSuggestion.append(autodict.getCorrected(PadMap["term"]));
}
}

// Add Dictionary suggestion
QString dictKey = PadMap["term"].toLower();
if (dictKey != "") {
dictSuggestion.append(db.find(dictKey));
// If the cache is empty fill it up with dictonary suggestion and autocorrect(for suffix)
if (phoneticCache[dictKey].isEmpty()) {
if (!dictSuggestion.isEmpty()) {
phoneticCache[dictKey] = dictSuggestion;
}
}
}


// Add suffix
QVector<QString> tempList;
QString fullWord;
int len = dictKey.length();

QVector<QString> rList;
if (!phoneticCache[dictKey].isEmpty()) {
rList = phoneticCache[dictKey];
}

if (len >= 2) {
for (int i = 1; i <= len; i++) {
QString testSuffix = dictKey.mid(i, len);

QString suffix = db.banglaForSuffix(testSuffix);
if(suffix != "") {
QString key = dictKey.mid(0, dictKey.length() - testSuffix.length());
if (!phoneticCache[key].isEmpty()) {
for (auto& cacheItem : phoneticCache[key]) {
QString cacheRightChar = cacheItem.right(1);
QString suffixLeftChar = suffix.left(1);
if (isVowel(cacheRightChar) && isKar(suffixLeftChar)) {
fullWord = cacheItem + "\u09DF" + suffix;
tempList.append(fullWord);
} else {
if (cacheRightChar == "\u09ce") {
fullWord = cacheItem.mid(0, cacheItem.length() - 1) + "\u09a4" + suffix;
tempList.append(fullWord);
} else if (cacheRightChar == "\u0982") {
fullWord = cacheItem.mid(0, cacheItem.length() - 1) + "\u0999" + suffix;
tempList.append(fullWord);
} else {
fullWord = cacheItem + suffix;
tempList.append(fullWord);
}
}
}

for (auto& item : tempList) {
rList.append(item);
}
}
}
}
}

// Sort it
std::sort(rList.begin(), rList.end(), [&] (QString i, QString j) {
int dist1 = levenshtein_distance(phonetic, i);
int dist2 = levenshtein_distance(phonetic, j);
if(dist1 < dist2) {
return i > j;
} else if(dist1 > dist2) {
return i < j;
} else {
return i < j;
}
});

QVector<QString> tempSuggestion;

for(auto& item : rList) {
appendIfNotContains(tempSuggestion, item);
}

// Add Clasic Phonetic
appendIfNotContains(tempSuggestion, phonetic);

// Add padding to all
for (auto& item : tempSuggestion) {
item = PadMap["prefix"] + item + PadMap["suffix"];
}

// Create suggestion
suggestion.append(tempSuggestion);

return suggestion;
}

/* TODO: Be sure that it works...
*/
QString PhoneticSuggestion::getPrevSelected() {
QString selected;
QString word = PadMap["term"];
int len = word.length();

if (cacheMan.getCandidateSelection(PadMap["term"]) != "") {
selected = cacheMan.getCandidateSelection(PadMap["term"]);
} else {
//Full word was not found, try checking without suffix
if (len >= 2) {
for (int j = 1; j < len; j++) {
QString testSuffix = word.right(j).toLower();

QString suffix = db.banglaForSuffix(testSuffix);
if (suffix != "") {
QString key = word.mid(0, word.length() - testSuffix.length());

if (cacheMan.getCandidateSelection(key) != "") {
QString keyWord = cacheMan.getCandidateSelection(key);
QString kwRightChar = keyWord.right(1);
QString suffixLeftChar = suffix.left(1);

if (isVowel(kwRightChar) && isKar(suffixLeftChar)) {
selected = keyWord + "\u09df" + suffix;
} else {
if (kwRightChar == "\u09ce") {
selected = keyWord.mid(0, keyWord.length() -1) + "\u09a4" + suffix;
} else if (kwRightChar == "\u0982") {
selected = keyWord.mid(0, keyWord.length() -1) + "\u0999" + suffix;
} else {
selected = keyWord + suffix;
}
}

break;
}
}
}
}
}

return PadMap["prefix"] + selected + PadMap["suffix"];
}

void PhoneticSuggestion::saveSelection(QString selected) {
QMap<QString, QString> mapTerm = separatePadding(selected);
cacheMan.writeCandidateSelection(PadMap["term"], mapTerm["term"]);
}

QVector<QString> PhoneticSuggestion::Suggest(QString cache) {
//Seperate begining and trailing padding characters, punctuations etc. from whole word
PadMap = separatePadding(cache);

//Convert begining and trailing padding text to phonetic Bangla
PadMap["prefix"] = parser.parse(PadMap["prefix"]);
PadMap["suffix"] = parser.parse(PadMap["suffix"]);

QVector<QString> candidates = getSuggestion(cache);
if(candidates.isEmpty()) {
candidates << PadMap["prefix"];
}


return candidates;
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,9 @@
* OpenBangla Keyboard
* Copyright (C) 2015-2016 Muhammad Mominul Huque <mominul2082@gmail.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/

#ifndef PHONETIC_SUGGESTION_H
Expand All @@ -35,14 +26,17 @@ class PhoneticSuggestion {
CacheManager cacheMan;

QMap<QString, QString> PadMap;
QMap<QString, QVector<QString>> phoneticCache;

QMap<QString, QString> separatePadding(QString word);
bool isKar(QString word);
bool isVowel(QString word);
QVector<QString> getSuggestion(QString word);
void appendIfNotContains(QVector<QString> &array, QString item);
public:
PhoneticSuggestion();
void setLayout(QJsonObject lay);

QMap<QString, QString> separatePadding(QString word);
//bool isKar(QString word);
//bool isVowel(QString word);
QVector<QString> getSuggestion(QString term);
QString getPrevSelected();
void saveSelection(QString selected);

Expand Down
Loading

0 comments on commit 3064917

Please sign in to comment.