From be5210fe14e81f46e30d58a011a2d756e019a206 Mon Sep 17 00:00:00 2001 From: "Chanyub.Park" Date: Sun, 10 Apr 2022 07:44:26 +0000 Subject: [PATCH 1/6] add minimal replacer --- src/kiwi_bind.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/kiwi_bind.cpp b/src/kiwi_bind.cpp index dcda3e6..a0815ab 100644 --- a/src/kiwi_bind.cpp +++ b/src/kiwi_bind.cpp @@ -9,7 +9,6 @@ using namespace cpp11; #include #include -typedef int(*kiwi_receiver_t)(int, kiwi_res_h, void*); typedef int(*kiwi_builder_replacer_t)(const char*, int, char*, void*); static std::map m = { @@ -65,11 +64,15 @@ private : std::ifstream strm; }; -int readLines(int line, char* buffer, void* input) { - Scanner* scanner = (Scanner*)input; +int replacers(const char* input, int len, char* buffer, void* user) { + return 1; +} + +int readLines(int line_num, char* buffer, void* user) { + Scanner* scanner = (Scanner*)user; if (buffer == nullptr) { - if (line == 0) { + if (line_num == 0) { scanner->rewind(); } From 9bc616216504b33449a6ac1db7a0c9640dbbc0a8 Mon Sep 17 00:00:00 2001 From: "Chanyub.Park" Date: Mon, 11 Apr 2022 05:23:31 +0000 Subject: [PATCH 2/6] work check done need to test, wrap --- R/cpp11.R | 8 +++++ src/cpp11.cpp | 16 +++++++++ src/kiwi_bind.cpp | 61 +++++++++++++++++++++++++++----- tests/testthat/test-addrule.R | 11 ++++++ tests/testthat/test-preanalyze.R | 2 -- 5 files changed, 88 insertions(+), 10 deletions(-) create mode 100644 tests/testthat/test-addrule.R diff --git a/R/cpp11.R b/R/cpp11.R index 621cead..1d70826 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -1,5 +1,9 @@ # Generated by cpp11: do not edit by hand +function_r <- function(func) { + .Call(`_elbird_function_r`, func) +} + kiwi_version_ <- function() { .Call(`_elbird_kiwi_version_`) } @@ -32,6 +36,10 @@ kiwi_builder_add_pre_analyzed_word_ <- function(handle_ex, form, analyzed_r, sco .Call(`_elbird_kiwi_builder_add_pre_analyzed_word_`, handle_ex, form, analyzed_r, score) } +kiwi_builder_add_rule_ <- function(handle_ex, pos, pattern, replacement, score) { + .Call(`_elbird_kiwi_builder_add_rule_`, handle_ex, pos, pattern, replacement, score) +} + kiwi_builder_load_dict_ <- function(handle_ex, dict_path) { .Call(`_elbird_kiwi_builder_load_dict_`, handle_ex, dict_path) } diff --git a/src/cpp11.cpp b/src/cpp11.cpp index dcc81ff..06265f0 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -5,6 +5,13 @@ #include "cpp11/declarations.hpp" #include +// kiwi_bind.cpp +SEXP function_r(cpp11::function func); +extern "C" SEXP _elbird_function_r(SEXP func) { + BEGIN_CPP11 + return cpp11::as_sexp(function_r(cpp11::as_cpp>(func))); + END_CPP11 +} // kiwi_bind.cpp std::string kiwi_version_(); extern "C" SEXP _elbird_kiwi_version_() { @@ -63,6 +70,13 @@ extern "C" SEXP _elbird_kiwi_builder_add_pre_analyzed_word_(SEXP handle_ex, SEXP END_CPP11 } // kiwi_bind.cpp +int kiwi_builder_add_rule_(SEXP handle_ex, const char* pos, std::string pattern, std::string replacement, float score); +extern "C" SEXP _elbird_kiwi_builder_add_rule_(SEXP handle_ex, SEXP pos, SEXP pattern, SEXP replacement, SEXP score) { + BEGIN_CPP11 + return cpp11::as_sexp(kiwi_builder_add_rule_(cpp11::as_cpp>(handle_ex), cpp11::as_cpp>(pos), cpp11::as_cpp>(pattern), cpp11::as_cpp>(replacement), cpp11::as_cpp>(score))); + END_CPP11 +} +// kiwi_bind.cpp int kiwi_builder_load_dict_(SEXP handle_ex, const char* dict_path); extern "C" SEXP _elbird_kiwi_builder_load_dict_(SEXP handle_ex, SEXP dict_path) { BEGIN_CPP11 @@ -136,9 +150,11 @@ extern "C" SEXP _elbird_kiwi_split_into_sents_(SEXP handle_ex, SEXP text, SEXP m extern "C" { static const R_CallMethodDef CallEntries[] = { + {"_elbird_function_r", (DL_FUNC) &_elbird_function_r, 1}, {"_elbird_kiwi_analyze_", (DL_FUNC) &_elbird_kiwi_analyze_, 5}, {"_elbird_kiwi_builder_add_alias_word_", (DL_FUNC) &_elbird_kiwi_builder_add_alias_word_, 5}, {"_elbird_kiwi_builder_add_pre_analyzed_word_", (DL_FUNC) &_elbird_kiwi_builder_add_pre_analyzed_word_, 4}, + {"_elbird_kiwi_builder_add_rule_", (DL_FUNC) &_elbird_kiwi_builder_add_rule_, 5}, {"_elbird_kiwi_builder_add_word_", (DL_FUNC) &_elbird_kiwi_builder_add_word_, 4}, {"_elbird_kiwi_builder_build_", (DL_FUNC) &_elbird_kiwi_builder_build_, 1}, {"_elbird_kiwi_builder_close_", (DL_FUNC) &_elbird_kiwi_builder_close_, 1}, diff --git a/src/kiwi_bind.cpp b/src/kiwi_bind.cpp index a0815ab..a229087 100644 --- a/src/kiwi_bind.cpp +++ b/src/kiwi_bind.cpp @@ -3,13 +3,21 @@ #include #include #include +#include #include using namespace cpp11; #include #include -typedef int(*kiwi_builder_replacer_t)(const char*, int, char*, void*); +[[cpp11::register]] +SEXP function_r(cpp11::function func){ + // cpp11::writable::strings x; + // x.push_back("test"); + auto res = func("안녕하세요"); + return res; +}; + static std::map m = { { "URL", KIWI_MATCH_URL }, @@ -28,7 +36,11 @@ static std::map m = { }; int match_options_(const std::string match_string) { - if (!m.count(match_string)) throw std::invalid_argument{ std::string{"Unknown Build Options : "} + match_string }; + if (!m.count(match_string)) { + throw std::invalid_argument{ + std::string{"Unknown Build Options : "} + match_string + }; + } return m.find(match_string)->second; } @@ -64,10 +76,6 @@ private : std::ifstream strm; }; -int replacers(const char* input, int len, char* buffer, void* user) { - return 1; -} - int readLines(int line_num, char* buffer, void* user) { Scanner* scanner = (Scanner*)user; @@ -86,6 +94,40 @@ int readLines(int line_num, char* buffer, void* user) { return 0; } +class Replacer { +public : + void init(const std::string pattern, const std::string replacemnet_) { + std::regex re(pattern); + this->rep = re; + this->replacemnet = replacemnet_; + }; + int size(const char* input) { + std::string output = std::regex_replace(std::string(input), this->rep, this->replacemnet); + this->res = output; + return strlen(output.c_str())+1; + }; + const char* text() { + return this->res.c_str(); + }; + +private : + std::regex rep; + std::string replacemnet = ""; + std::string res; +}; + +int ruleprovider(const char* input, int size, char* buffer, void* user) { + Replacer* rpcr = (Replacer*)user; + if (buffer == nullptr) { + std::cout << input << std::endl; + return rpcr->size(input); + } + std::cout << rpcr->text() << std::endl; + strcpy(buffer, rpcr->text()); + return 0; +} + + [[cpp11::register]] std::string kiwi_version_() { return kiwi_version(); @@ -177,9 +219,12 @@ int kiwi_builder_add_pre_analyzed_word_( ); } -int kiwi_builder_add_rule_(SEXP handle_ex, const char* pos, kiwi_builder_replacer_t replacer, void* user_data, float score) { +[[cpp11::register]] +int kiwi_builder_add_rule_(SEXP handle_ex, const char* pos, std::string pattern, std::string replacement, float score) { cpp11::external_pointer handle(handle_ex); - return kiwi_builder_add_rule(handle.get(), pos, replacer, user_data, score); + Replacer rpcr; + rpcr.init(pattern, replacement); + return kiwi_builder_add_rule(handle.get(), pos, ruleprovider, &rpcr, score); } [[cpp11::register]] diff --git a/tests/testthat/test-addrule.R b/tests/testthat/test-addrule.R new file mode 100644 index 0000000..3fdd3cd --- /dev/null +++ b/tests/testthat/test-addrule.R @@ -0,0 +1,11 @@ +test_that("add rule works", { + skip_if_offline() + get_model("small") + kw <- kiwi_init_(kiwi_model_path_full("small"), 0, BuildOpt$DEFAULT) + res<- kiwi_analyze_wrap(kw, "했어요! 하잖아요! 할까요?") + res<- kiwi_analyze_wrap(kw, "했어요! 하잖아요! 할까요?") + kb <- kiwi_builder_init_(kiwi_model_path_full("small"), 0, BuildOpt$DEFAULT) + kiwi_builder_add_rule_(kb, "ef", "요$", "용", -2) + kw <- kiwi_builder_build_(kb) + res<- kiwi_analyze_wrap(kw, "했어용! 하잖아용! 할까용?") +}) diff --git a/tests/testthat/test-preanalyze.R b/tests/testthat/test-preanalyze.R index f1682dd..64d9cb9 100644 --- a/tests/testthat/test-preanalyze.R +++ b/tests/testthat/test-preanalyze.R @@ -22,6 +22,4 @@ test_that("pre analyze words", { res <- kiwi_analyze_wrap(kw, text = "팅겼어...", 1, Match$ALL_WITH_NORMALIZING) expect_equal(res[[1]]$Token[[1]]$form, "팅기") expect_equal(res[[1]]$Token[[1]]$tag, "VV") - rm(kw) - rm(kb) }) From 7d02c6d0cff16b32d1f75fb53736b33a5782bb4a Mon Sep 17 00:00:00 2001 From: "Chanyub.Park" Date: Mon, 11 Apr 2022 05:56:55 +0000 Subject: [PATCH 3/6] rm print test set --- src/kiwi_bind.cpp | 1 - tests/testthat/test-addrule.R | 8 ++++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/kiwi_bind.cpp b/src/kiwi_bind.cpp index a229087..b3e3403 100644 --- a/src/kiwi_bind.cpp +++ b/src/kiwi_bind.cpp @@ -119,7 +119,6 @@ private : int ruleprovider(const char* input, int size, char* buffer, void* user) { Replacer* rpcr = (Replacer*)user; if (buffer == nullptr) { - std::cout << input << std::endl; return rpcr->size(input); } std::cout << rpcr->text() << std::endl; diff --git a/tests/testthat/test-addrule.R b/tests/testthat/test-addrule.R index 3fdd3cd..b75a000 100644 --- a/tests/testthat/test-addrule.R +++ b/tests/testthat/test-addrule.R @@ -2,10 +2,14 @@ test_that("add rule works", { skip_if_offline() get_model("small") kw <- kiwi_init_(kiwi_model_path_full("small"), 0, BuildOpt$DEFAULT) - res<- kiwi_analyze_wrap(kw, "했어요! 하잖아요! 할까요?") - res<- kiwi_analyze_wrap(kw, "했어요! 하잖아요! 할까요?") + res1<- kiwi_analyze_wrap(kw, "했어요! 하잖아요! 할까요?", top_n = 1) + res2<- kiwi_analyze_wrap(kw, "했어용! 하잖아용! 할까용?", top_n = 1) + + expect_false(identical(res1[[1]]$Score, res2[[1]]$Score)) + kb <- kiwi_builder_init_(kiwi_model_path_full("small"), 0, BuildOpt$DEFAULT) kiwi_builder_add_rule_(kb, "ef", "요$", "용", -2) kw <- kiwi_builder_build_(kb) res<- kiwi_analyze_wrap(kw, "했어용! 하잖아용! 할까용?") + expect_false(identical(res1[[1]]$Score, res[[1]]$Score)) }) From d3564a35b772109f70af7564ee980a9e741e7ed3 Mon Sep 17 00:00:00 2001 From: "Chanyub.Park" Date: Mon, 11 Apr 2022 07:36:20 +0000 Subject: [PATCH 4/6] rm print --- src/kiwi_bind.cpp | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/src/kiwi_bind.cpp b/src/kiwi_bind.cpp index b3e3403..a2300f4 100644 --- a/src/kiwi_bind.cpp +++ b/src/kiwi_bind.cpp @@ -10,15 +10,6 @@ using namespace cpp11; #include #include -[[cpp11::register]] -SEXP function_r(cpp11::function func){ - // cpp11::writable::strings x; - // x.push_back("test"); - auto res = func("안녕하세요"); - return res; -}; - - static std::map m = { { "URL", KIWI_MATCH_URL }, { "EMAIL", KIWI_MATCH_EMAIL }, @@ -102,7 +93,9 @@ public : this->replacemnet = replacemnet_; }; int size(const char* input) { - std::string output = std::regex_replace(std::string(input), this->rep, this->replacemnet); + std::string output = std::regex_replace(std::string(input), + this->rep, + this->replacemnet); this->res = output; return strlen(output.c_str())+1; }; @@ -121,7 +114,6 @@ int ruleprovider(const char* input, int size, char* buffer, void* user) { if (buffer == nullptr) { return rpcr->size(input); } - std::cout << rpcr->text() << std::endl; strcpy(buffer, rpcr->text()); return 0; } From e101ed051c8cd7c0ddd0340425952bf566920b1c Mon Sep 17 00:00:00 2001 From: "Chanyub.Park" Date: Mon, 11 Apr 2022 07:42:54 +0000 Subject: [PATCH 5/6] rm test function --- R/cpp11.R | 4 ---- src/cpp11.cpp | 8 -------- 2 files changed, 12 deletions(-) diff --git a/R/cpp11.R b/R/cpp11.R index 1d70826..56e60ec 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -1,9 +1,5 @@ # Generated by cpp11: do not edit by hand -function_r <- function(func) { - .Call(`_elbird_function_r`, func) -} - kiwi_version_ <- function() { .Call(`_elbird_kiwi_version_`) } diff --git a/src/cpp11.cpp b/src/cpp11.cpp index 06265f0..ba58e5b 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -5,13 +5,6 @@ #include "cpp11/declarations.hpp" #include -// kiwi_bind.cpp -SEXP function_r(cpp11::function func); -extern "C" SEXP _elbird_function_r(SEXP func) { - BEGIN_CPP11 - return cpp11::as_sexp(function_r(cpp11::as_cpp>(func))); - END_CPP11 -} // kiwi_bind.cpp std::string kiwi_version_(); extern "C" SEXP _elbird_kiwi_version_() { @@ -150,7 +143,6 @@ extern "C" SEXP _elbird_kiwi_split_into_sents_(SEXP handle_ex, SEXP text, SEXP m extern "C" { static const R_CallMethodDef CallEntries[] = { - {"_elbird_function_r", (DL_FUNC) &_elbird_function_r, 1}, {"_elbird_kiwi_analyze_", (DL_FUNC) &_elbird_kiwi_analyze_, 5}, {"_elbird_kiwi_builder_add_alias_word_", (DL_FUNC) &_elbird_kiwi_builder_add_alias_word_, 5}, {"_elbird_kiwi_builder_add_pre_analyzed_word_", (DL_FUNC) &_elbird_kiwi_builder_add_pre_analyzed_word_, 4}, From 2b4d05880b64083de78e9d75824dce1600e2f784 Mon Sep 17 00:00:00 2001 From: "Chanyub.Park" Date: Mon, 11 Apr 2022 09:02:17 +0000 Subject: [PATCH 6/6] inhence test --- src/kiwi_bind.cpp | 13 +++++++++++-- tests/testthat/test-addrule.R | 4 +++- tests/testthat/test-preanalyze.R | 3 ++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/kiwi_bind.cpp b/src/kiwi_bind.cpp index a2300f4..4dfef29 100644 --- a/src/kiwi_bind.cpp +++ b/src/kiwi_bind.cpp @@ -211,7 +211,12 @@ int kiwi_builder_add_pre_analyzed_word_( } [[cpp11::register]] -int kiwi_builder_add_rule_(SEXP handle_ex, const char* pos, std::string pattern, std::string replacement, float score) { +int kiwi_builder_add_rule_( + SEXP handle_ex, + const char* pos, + std::string pattern, + std::string replacement, + float score) { cpp11::external_pointer handle(handle_ex); Replacer rpcr; rpcr.init(pattern, replacement); @@ -364,7 +369,11 @@ SEXP kiwi_analyze_( } [[cpp11::register]] -SEXP kiwi_split_into_sents_(SEXP handle_ex, const char* text, int match_options, bool return_tokens) { +SEXP kiwi_split_into_sents_( + SEXP handle_ex, + const char* text, + int match_options, + bool return_tokens) { cpp11::external_pointer handle(handle_ex); kiwi_res_h tokenized_res; kiwi_res_h *tknptr = &tokenized_res; diff --git a/tests/testthat/test-addrule.R b/tests/testthat/test-addrule.R index b75a000..2334150 100644 --- a/tests/testthat/test-addrule.R +++ b/tests/testthat/test-addrule.R @@ -1,6 +1,8 @@ test_that("add rule works", { skip_if_offline() - get_model("small") + if (!model_works("small")) + get_model("small") + kw <- kiwi_init_(kiwi_model_path_full("small"), 0, BuildOpt$DEFAULT) res1<- kiwi_analyze_wrap(kw, "했어요! 하잖아요! 할까요?", top_n = 1) res2<- kiwi_analyze_wrap(kw, "했어용! 하잖아용! 할까용?", top_n = 1) diff --git a/tests/testthat/test-preanalyze.R b/tests/testthat/test-preanalyze.R index 64d9cb9..59eb580 100644 --- a/tests/testthat/test-preanalyze.R +++ b/tests/testthat/test-preanalyze.R @@ -1,6 +1,7 @@ test_that("pre analyze words", { skip_if_offline() - get_model("small") + if (!model_works("small")) + get_model("small") anl <- data.frame( morphs = c("팅기", "었", "어"),