frictionlessdata · PietrH · Aug 3, 2023 · Aug 3, 2023 · Aug 3, 2023 · Aug 3, 2023
diff --git a/R/read_resource.R b/R/read_resource.R
@@ -352,11 +352,11 @@ read_resource <- function(package, resource_name, col_select = NULL) {
   # Read data directly
   if (resource$read_from == "df") {
     df <- dplyr::as_tibble(resource$data)
-
+    data_col_names <- colnames(df)
   # Read data from data
   } else if (resource$read_from == "data") {
     df <- dplyr::as_tibble(do.call(rbind.data.frame, resource$data))
-
+    data_col_names <- colnames(df)
   # Read data from path(s)
   } else if (resource$read_from == "path" || resource$read_from == "url") {
     dataframes <- list()
@@ -391,7 +391,41 @@ read_resource <- function(package, resource_name, col_select = NULL) {
     }
     # Merge data frames for all paths
     df <- dplyr::bind_rows(dataframes)
+    # Read header from first file
+    data_col_names <-
+      readr::read_lines(file = paths[1], n_max = 1) %>%
+      I() %>%
+      readr::read_delim(delim = replace_null(dialect$delimiter, ","),
+                        quote = replace_null(dialect$quoteChar, "\""),
+                        escape_backslash = ifelse(
+                          replace_null(dialect$escapeChar, "not set") == "\\", TRUE, FALSE
+                        ),
+                        escape_double = ifelse(
+                          # if escapeChar is set, set doubleQuote to FALSE (mutually exclusive)
+                          replace_null(dialect$escapeChar, "not set") == "\\",
+                          FALSE,
+                          replace_null(dialect$doubleQuote, TRUE)
+                        ),
+                        locale = locale,
+                        na = replace_null(schema$missingValues, ""),
+                        comment = replace_null(dialect$commentChar, ""),
+                        trim_ws = replace_null(dialect$skipInitialSpace, FALSE),
+                        col_names = FALSE,
+                        show_col_types = FALSE) %>%
+      dplyr::slice_head(n = 1) %>%
+      unlist(use.names = FALSE)
   }
+  # compare df header to schema
+  assertthat::assert_that(
+    identical(tolower(col_names), tolower(data_col_names)),
+    msg = glue::glue(
+      "Field names in `schema` must match column names in data:",
+      "\u2139 Field names: `{field_names_collapse}`",
+      "\u2139 Column names in data: `{data_col_names_collapse}`",
+      .sep = "\n",
+      field_names_collapse = glue::glue_collapse(col_names, sep = ", "),
+      data_col_names_collapse = glue::glue_collapse(data_col_names, sep = ", ")
+    ))
 
   return(df)
 }
diff --git a/tests/testthat/test-read_resource.R b/tests/testthat/test-read_resource.R
@@ -95,6 +95,110 @@ test_that("read_resource() returns error on column selection not in schema", {
   )
 })
 
+test_that("read_resource() returns error on missing columns in data", {
+  temp_package_dir <- file.path(tempdir(),"missing_cols_package")
+  # Create datapackage with missing columns in data
+  dir.create(temp_package_dir)
+  file.copy(from = list.files(system.file("extdata", package = "frictionless"),
+                              full.names = TRUE),
+            to = file.path(
+              temp_package_dir,
+              list.files(system.file("extdata", package = "frictionless"),
+                         full.names = FALSE))
+  )
+  readr::read_csv(file.path(temp_package_dir, "deployments.csv"),
+                  col_select = -start,
+                  show_col_types = FALSE) %>%
+    readr::write_csv(file.path(temp_package_dir, "deployments.csv"))
+  # Clean up after test
+  on.exit(unlink(temp_package_dir, recursive = TRUE))
+  # Read the new package
+  missing_cols_package <-
+    suppressMessages(
+      read_package(file.path(temp_package_dir,"datapackage.json"))
+      )
+  # Test
+  expect_error(
+    read_resource(missing_cols_package, "deployments"),
+    regexp = "must match column names in data"
+  )
+})
+
+test_that("read_resource() returns error on extra columns in data", {
+  # clean up after test
+  on.exit(unlink(file.path(tempdir(),"extra_cols_package"), recursive = TRUE))
+  # create datapackage with extra columns in data
+  temp_package_dir <- file.path(tempdir(),"extra_cols_package")
+  dir.create(temp_package_dir)
+  file.copy(from = list.files(system.file("extdata", package = "frictionless"),
+                              full.names = TRUE),
+            to = file.path(
+              temp_package_dir,
+              list.files(system.file("extdata", package = "frictionless"),
+                         full.names = FALSE))
+  )
+  readr::read_csv(file.path(temp_package_dir, "deployments.csv"),
+                  show_col_types = FALSE) %>%
+    dplyr::mutate(random_column = runif(n = 3)) %>%
+    readr::write_csv(file.path(temp_package_dir, "deployments.csv"))
+  # read the new package
+  extra_cols_package <-
+    suppressMessages(
+      read_package(file.path(temp_package_dir,"datapackage.json"))
+    )
+  # Test
+  expect_error(
+    read_resource(extra_cols_package, "deployments"),
+    regexp = "must match column names in data"
+  )
+})
+
+test_that("read_resource() returns error on missing columns in schema", {
+  # create package with a missing column in the schema of observations
+  missing_col_in_schema_pkg <- example_package
+  ## remove `timestamp`
+  missing_col_in_schema_pkg$resources[[2]]$schema$fields <-
+    missing_col_in_schema_pkg$resources[[2]]$schema$fields[-3]
+  # Test
+  expect_error(
+    read_resource(missing_col_in_schema_pkg, "observations"),
+    regexp = "must match column names in data"
+  )
+})
+
+test_that("read_resource() returns error on column order mismatch between
+          schema and data", {
+  # Create package with the wrong order in the schema of deployments
+  wrong_order_in_schema_pkg <- example_package
+  ## Reorder columns
+  purrr::pluck(wrong_order_in_schema_pkg, "resources", 1, "schema", "fields") <-
+    purrr::chuck(wrong_order_in_schema_pkg, "resources", 1, "schema", "fields")[
+      c(5, 1, 4, 3, 2) # this is not the order the columns have in the data!
+    ]
+  # Test
+  expect_error(
+    read_resource(wrong_order_in_schema_pkg, "deployments"),
+    regexp = "must match column names in data"
+  )
+})
+
+test_that("read_resource() doesn't compare header when dialect$header is null", {
+  # not only will it not be compared, the header will be skipped when reading
+})
+
+test_that("read_resource() allows case mismatch between schema and data", {
+  # create package with the wrong case in the schema of observations
+  wrong_case_in_schema_pkg <- example_package
+  ## Change case of single field name
+  wrong_case_in_schema_pkg$resources[[2]]$schema$fields[[3]]$name <-
+    toupper(wrong_case_in_schema_pkg$resources[[2]]$schema$fields[[3]]$name)
+  # Test
+  expect_s3_class(
+    read_resource(wrong_case_in_schema_pkg, "observations"),
+    "tbl"
+  )
+})
+
 test_that("read_resource() returns error on incorrect Data Package", {
   expect_error(
     read_resource(list(), "deployments"),