Skip to content

Commit

Permalink
[R-package] add support for non-ASCII feature names (fixes #2983) (#3647
Browse files Browse the repository at this point in the history
)

* [R-package] add support for non-ASCII feature names (fixes #2983)

* fix Windows
  • Loading branch information
jameslamb authored Jan 3, 2021
1 parent 85b9daa commit aae4fe4
Showing 1 changed file with 17 additions and 5 deletions.
22 changes: 17 additions & 5 deletions R-package/tests/testthat/test_basic.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
context("lightgbm()")

ON_WINDOWS <- .Platform$OS.type == "windows"

data(agaricus.train, package = "lightgbm")
data(agaricus.test, package = "lightgbm")
train <- agaricus.train
Expand Down Expand Up @@ -1168,7 +1170,6 @@ test_that("lgb.train() works with early stopping for regression with a metric th


test_that("lgb.train() supports non-ASCII feature names", {
testthat::skip("UTF-8 feature names are not fully supported in the R package")
dtrain <- lgb.Dataset(
data = matrix(rnorm(400L), ncol = 4L)
, label = rnorm(100L)
Expand All @@ -1185,10 +1186,21 @@ test_that("lgb.train() supports non-ASCII feature names", {
)
expect_true(lgb.is.Booster(bst))
dumped_model <- jsonlite::fromJSON(bst$dump_model())
expect_identical(
dumped_model[["feature_names"]]
, feature_names
)

# UTF-8 strings are not well-supported on Windows
# * https://developer.r-project.org/Blog/public/2020/05/02/utf-8-support-on-windows/
# * https://developer.r-project.org/Blog/public/2020/07/30/windows/utf-8-build-of-r-and-cran-packages/index.html
if (!ON_WINDOWS) {
expect_identical(
dumped_model[["feature_names"]]
, feature_names
)
} else {
expect_identical(
dumped_model[["feature_names"]]
, iconv(feature_names, to = "UTF-8")
)
}
})

test_that("when early stopping is not activated, best_iter and best_score come from valids and not training data", {
Expand Down

0 comments on commit aae4fe4

Please sign in to comment.