From 8878112024694e9e0ede6df37273c88f05360382 Mon Sep 17 00:00:00 2001 From: chainsawriot Date: Thu, 1 Dec 2022 12:27:56 +0100 Subject: [PATCH] add `sep` parameter to `readtext` --- DESCRIPTION | 2 +- R/readtext.R | 9 +++++---- man/readtext.Rd | 3 +++ 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c1800fc..6d709c0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -41,5 +41,5 @@ Encoding: UTF-8 BugReports: https://github.com/quanteda/readtext/issues LazyData: TRUE VignetteBuilder: knitr -RoxygenNote: 7.1.1 +RoxygenNote: 7.2.1 Roxygen: list(markdown = TRUE) diff --git a/R/readtext.R b/R/readtext.R index de5069f..83ba197 100644 --- a/R/readtext.R +++ b/R/readtext.R @@ -91,6 +91,7 @@ #' \item 2: output a brief summary message #' \item 3: output detailed file-related messages #' } +#' @param sep separator for csv, default to "," #' @param ... additional arguments passed through to low-level file reading #' function, such as [file()], [fread()], etc. Useful #' for specifying an input encoding option, which is specified in the same was @@ -156,7 +157,7 @@ readtext <- function(file, ignore_missing_files = FALSE, text_field = NULL, docid_field = NULL, docvarsfrom = c("metadata", "filenames", "filepaths"), dvsep = "_", docvarnames = NULL, encoding = NULL, source = NULL, cache = TRUE, - verbosity = readtext_options("verbosity"), + sep = ",", verbosity = readtext_options("verbosity"), ...) { args <- list(...) @@ -205,7 +206,7 @@ readtext <- function(file, ignore_missing_files = FALSE, text_field = NULL, sources <- mapply(function(x, e) { get_source(x, text_field = text_field, docid_field = docid_field, - encoding = e, source = source, verbosity = verbosity, ...) + encoding = e, source = source, verbosity = verbosity, sep = sep, ...) }, files, encoding, SIMPLIFY = FALSE) # combine all of the data.frames returned @@ -241,7 +242,7 @@ readtext <- function(file, ignore_missing_files = FALSE, text_field = NULL, ## Read each file as appropriate, calling the get_* functions for recognized ## file types -get_source <- function(path, text_field, docid_field, replace_specialchar = FALSE, verbosity = 1, ..., +get_source <- function(path, text_field, docid_field, replace_specialchar = FALSE, verbosity = 1, sep, ..., # deprecated arguments textfield) { @@ -264,7 +265,7 @@ get_source <- function(path, text_field, docid_field, replace_specialchar = FALS result <- switch(ext, txt = get_txt(path, ...), - csv = get_csv(path, text_field, docid_field, sep = ",", ...), + csv = get_csv(path, text_field, docid_field, sep = sep, ...), tsv = get_csv(path, text_field, docid_field, sep = "\t", ...), tab = get_csv(path, text_field, docid_field, sep = "\t", ...), json = get_json(path, text_field, docid_field, verbosity = verbosity, ...), diff --git a/man/readtext.Rd b/man/readtext.Rd index 1317729..ad9823a 100644 --- a/man/readtext.Rd +++ b/man/readtext.Rd @@ -15,6 +15,7 @@ readtext( encoding = NULL, source = NULL, cache = TRUE, + sep = ",", verbosity = readtext_options("verbosity"), ... ) @@ -109,6 +110,8 @@ as JSON or HTML. Currently supported types are \code{"twitter"} for JSON and \item{cache}{if \code{TRUE}, save remote file to a temporary folder. Only used when \code{file} is a URL.} +\item{sep}{separator for csv, default to ","} + \item{verbosity}{\itemize{ \item 0: output errors only \item 1: output errors and warnings (default)