apache · zero323 · May 1, 2017 · May 1, 2017 · May 2, 2017 · May 2, 2017
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
@@ -257,6 +257,7 @@ exportMethods("%<=>%",
               "hypot",
               "ifelse",
               "initcap",
+              "input_file_name",
               "instr",
               "isNaN",
               "isNotNull",

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
@@ -3974,3 +3974,23 @@ setMethod("grouping_id",
             jc <- callJStatic("org.apache.spark.sql.functions", "grouping_id", jcols)
             column(jc)
           })
+
+#' input_file_name
+#'
+#' Creates a string column for the file name of the current Spark task.
+#'
+#' @rdname input_file_name
+#' @name input_file_name
+#' @aliases input_file_name,missing-method
+#' @export
+#' @examples \dontrun{
+#' df <- read.text("README.md")
+#'
+#' head(select(df, input_file_name()))
+#' }
+#' @note input_file_name since 2.3.0
+setMethod("input_file_name", signature("missing"),
+          function() {
+            jc <- callJStatic("org.apache.spark.sql.functions", "input_file_name")
+            column(jc)
+          })
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
@@ -1076,6 +1076,12 @@ setGeneric("hypot", function(y, x) { standardGeneric("hypot") })
 #' @export
 setGeneric("initcap", function(x) { standardGeneric("initcap") })
 
+#' @param x empty. Should be used with no argument.
+#' @rdname input_file_name
+#' @export
+setGeneric("input_file_name",
+           function(x = "missing") { standardGeneric("input_file_name") })
+
 #' @rdname instr
 #' @export
 setGeneric("instr", function(y, x) { standardGeneric("instr") })

diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1366,6 +1366,11 @@ test_that("column functions", {
   expect_equal(collect(df2)[[3, 1]], FALSE)
   expect_equal(collect(df2)[[3, 2]], TRUE)
 
+  # Test that input_file_name()
+  actual_names <- sort(collect(distinct(select(df, input_file_name()))))
+  expect_equal(length(actual_names), 1)
+  expect_equal(basename(actual_names[1, 1]), basename(jsonPath))
+
   df3 <- select(df, between(df$name, c("Apache", "Spark")))
   expect_equal(collect(df3)[[1, 1]], TRUE)
   expect_equal(collect(df3)[[2, 1]], FALSE)