diff --git a/R/cpp11.R b/R/cpp11.R index 621cead..56e60ec 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -32,6 +32,10 @@ kiwi_builder_add_pre_analyzed_word_ <- function(handle_ex, form, analyzed_r, sco .Call(`_elbird_kiwi_builder_add_pre_analyzed_word_`, handle_ex, form, analyzed_r, score) } +kiwi_builder_add_rule_ <- function(handle_ex, pos, pattern, replacement, score) { + .Call(`_elbird_kiwi_builder_add_rule_`, handle_ex, pos, pattern, replacement, score) +} + kiwi_builder_load_dict_ <- function(handle_ex, dict_path) { .Call(`_elbird_kiwi_builder_load_dict_`, handle_ex, dict_path) } diff --git a/src/cpp11.cpp b/src/cpp11.cpp index dcc81ff..ba58e5b 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -63,6 +63,13 @@ extern "C" SEXP _elbird_kiwi_builder_add_pre_analyzed_word_(SEXP handle_ex, SEXP END_CPP11 } // kiwi_bind.cpp +int kiwi_builder_add_rule_(SEXP handle_ex, const char* pos, std::string pattern, std::string replacement, float score); +extern "C" SEXP _elbird_kiwi_builder_add_rule_(SEXP handle_ex, SEXP pos, SEXP pattern, SEXP replacement, SEXP score) { + BEGIN_CPP11 + return cpp11::as_sexp(kiwi_builder_add_rule_(cpp11::as_cpp>(handle_ex), cpp11::as_cpp>(pos), cpp11::as_cpp>(pattern), cpp11::as_cpp>(replacement), cpp11::as_cpp>(score))); + END_CPP11 +} +// kiwi_bind.cpp int kiwi_builder_load_dict_(SEXP handle_ex, const char* dict_path); extern "C" SEXP _elbird_kiwi_builder_load_dict_(SEXP handle_ex, SEXP dict_path) { BEGIN_CPP11 @@ -139,6 +146,7 @@ static const R_CallMethodDef CallEntries[] = { {"_elbird_kiwi_analyze_", (DL_FUNC) &_elbird_kiwi_analyze_, 5}, {"_elbird_kiwi_builder_add_alias_word_", (DL_FUNC) &_elbird_kiwi_builder_add_alias_word_, 5}, {"_elbird_kiwi_builder_add_pre_analyzed_word_", (DL_FUNC) &_elbird_kiwi_builder_add_pre_analyzed_word_, 4}, + {"_elbird_kiwi_builder_add_rule_", (DL_FUNC) &_elbird_kiwi_builder_add_rule_, 5}, {"_elbird_kiwi_builder_add_word_", (DL_FUNC) &_elbird_kiwi_builder_add_word_, 4}, {"_elbird_kiwi_builder_build_", (DL_FUNC) &_elbird_kiwi_builder_build_, 1}, {"_elbird_kiwi_builder_close_", (DL_FUNC) &_elbird_kiwi_builder_close_, 1}, diff --git a/src/kiwi_bind.cpp b/src/kiwi_bind.cpp index dcda3e6..4dfef29 100644 --- a/src/kiwi_bind.cpp +++ b/src/kiwi_bind.cpp @@ -3,15 +3,13 @@ #include #include #include +#include #include using namespace cpp11; #include #include -typedef int(*kiwi_receiver_t)(int, kiwi_res_h, void*); -typedef int(*kiwi_builder_replacer_t)(const char*, int, char*, void*); - static std::map m = { { "URL", KIWI_MATCH_URL }, { "EMAIL", KIWI_MATCH_EMAIL }, @@ -29,7 +27,11 @@ static std::map m = { }; int match_options_(const std::string match_string) { - if (!m.count(match_string)) throw std::invalid_argument{ std::string{"Unknown Build Options : "} + match_string }; + if (!m.count(match_string)) { + throw std::invalid_argument{ + std::string{"Unknown Build Options : "} + match_string + }; + } return m.find(match_string)->second; } @@ -65,11 +67,11 @@ private : std::ifstream strm; }; -int readLines(int line, char* buffer, void* input) { - Scanner* scanner = (Scanner*)input; +int readLines(int line_num, char* buffer, void* user) { + Scanner* scanner = (Scanner*)user; if (buffer == nullptr) { - if (line == 0) { + if (line_num == 0) { scanner->rewind(); } @@ -83,6 +85,40 @@ int readLines(int line, char* buffer, void* input) { return 0; } +class Replacer { +public : + void init(const std::string pattern, const std::string replacemnet_) { + std::regex re(pattern); + this->rep = re; + this->replacemnet = replacemnet_; + }; + int size(const char* input) { + std::string output = std::regex_replace(std::string(input), + this->rep, + this->replacemnet); + this->res = output; + return strlen(output.c_str())+1; + }; + const char* text() { + return this->res.c_str(); + }; + +private : + std::regex rep; + std::string replacemnet = ""; + std::string res; +}; + +int ruleprovider(const char* input, int size, char* buffer, void* user) { + Replacer* rpcr = (Replacer*)user; + if (buffer == nullptr) { + return rpcr->size(input); + } + strcpy(buffer, rpcr->text()); + return 0; +} + + [[cpp11::register]] std::string kiwi_version_() { return kiwi_version(); @@ -174,9 +210,17 @@ int kiwi_builder_add_pre_analyzed_word_( ); } -int kiwi_builder_add_rule_(SEXP handle_ex, const char* pos, kiwi_builder_replacer_t replacer, void* user_data, float score) { +[[cpp11::register]] +int kiwi_builder_add_rule_( + SEXP handle_ex, + const char* pos, + std::string pattern, + std::string replacement, + float score) { cpp11::external_pointer handle(handle_ex); - return kiwi_builder_add_rule(handle.get(), pos, replacer, user_data, score); + Replacer rpcr; + rpcr.init(pattern, replacement); + return kiwi_builder_add_rule(handle.get(), pos, ruleprovider, &rpcr, score); } [[cpp11::register]] @@ -325,7 +369,11 @@ SEXP kiwi_analyze_( } [[cpp11::register]] -SEXP kiwi_split_into_sents_(SEXP handle_ex, const char* text, int match_options, bool return_tokens) { +SEXP kiwi_split_into_sents_( + SEXP handle_ex, + const char* text, + int match_options, + bool return_tokens) { cpp11::external_pointer handle(handle_ex); kiwi_res_h tokenized_res; kiwi_res_h *tknptr = &tokenized_res; diff --git a/tests/testthat/test-addrule.R b/tests/testthat/test-addrule.R new file mode 100644 index 0000000..2334150 --- /dev/null +++ b/tests/testthat/test-addrule.R @@ -0,0 +1,17 @@ +test_that("add rule works", { + skip_if_offline() + if (!model_works("small")) + get_model("small") + + kw <- kiwi_init_(kiwi_model_path_full("small"), 0, BuildOpt$DEFAULT) + res1<- kiwi_analyze_wrap(kw, "했어요! 하잖아요! 할까요?", top_n = 1) + res2<- kiwi_analyze_wrap(kw, "했어용! 하잖아용! 할까용?", top_n = 1) + + expect_false(identical(res1[[1]]$Score, res2[[1]]$Score)) + + kb <- kiwi_builder_init_(kiwi_model_path_full("small"), 0, BuildOpt$DEFAULT) + kiwi_builder_add_rule_(kb, "ef", "요$", "용", -2) + kw <- kiwi_builder_build_(kb) + res<- kiwi_analyze_wrap(kw, "했어용! 하잖아용! 할까용?") + expect_false(identical(res1[[1]]$Score, res[[1]]$Score)) +}) diff --git a/tests/testthat/test-preanalyze.R b/tests/testthat/test-preanalyze.R index f1682dd..59eb580 100644 --- a/tests/testthat/test-preanalyze.R +++ b/tests/testthat/test-preanalyze.R @@ -1,6 +1,7 @@ test_that("pre analyze words", { skip_if_offline() - get_model("small") + if (!model_works("small")) + get_model("small") anl <- data.frame( morphs = c("팅기", "었", "어"), @@ -22,6 +23,4 @@ test_that("pre analyze words", { res <- kiwi_analyze_wrap(kw, text = "팅겼어...", 1, Match$ALL_WITH_NORMALIZING) expect_equal(res[[1]]$Token[[1]]$form, "팅기") expect_equal(res[[1]]$Token[[1]]$tag, "VV") - rm(kw) - rm(kb) })