From 8878112024694e9e0ede6df37273c88f05360382 Mon Sep 17 00:00:00 2001
From: chainsawriot <chainsawtiney@gmail.com>
Date: Thu, 1 Dec 2022 12:27:56 +0100
Subject: [PATCH] add `sep` parameter to `readtext`

---
 DESCRIPTION     | 2 +-
 R/readtext.R    | 9 +++++----
 man/readtext.Rd | 3 +++
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index c1800fc..6d709c0 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -41,5 +41,5 @@ Encoding: UTF-8
 BugReports: https://github.com/quanteda/readtext/issues
 LazyData: TRUE
 VignetteBuilder: knitr
-RoxygenNote: 7.1.1
+RoxygenNote: 7.2.1
 Roxygen: list(markdown = TRUE)
diff --git a/R/readtext.R b/R/readtext.R
index de5069f..83ba197 100644
--- a/R/readtext.R
+++ b/R/readtext.R
@@ -91,6 +91,7 @@
 #'   \item 2: output a brief summary message
 #'   \item 3: output detailed file-related messages
 #' }
+#' @param sep separator for csv, default to ","
 #' @param ... additional arguments passed through to low-level file reading 
 #'   function, such as [file()], [fread()], etc.  Useful 
 #'   for specifying an input encoding option, which is specified in the same was
@@ -156,7 +157,7 @@ readtext <- function(file, ignore_missing_files = FALSE, text_field = NULL,
                     docid_field = NULL,
                     docvarsfrom = c("metadata", "filenames", "filepaths"), dvsep = "_",
                     docvarnames = NULL, encoding = NULL, source = NULL, cache = TRUE,
-                    verbosity = readtext_options("verbosity"),
+                    sep = ",", verbosity = readtext_options("verbosity"),
                     ...) {
 
     args <- list(...)
@@ -205,7 +206,7 @@ readtext <- function(file, ignore_missing_files = FALSE, text_field = NULL,
     
     sources <- mapply(function(x, e) {
         get_source(x, text_field = text_field, docid_field = docid_field, 
-                   encoding = e, source = source, verbosity = verbosity, ...)
+                   encoding = e, source = source, verbosity = verbosity, sep = sep, ...)
     }, files, encoding, SIMPLIFY = FALSE)
 
     # combine all of the data.frames returned
@@ -241,7 +242,7 @@ readtext <- function(file, ignore_missing_files = FALSE, text_field = NULL,
 
 ## Read each file as appropriate, calling the get_* functions for recognized
 ## file types
-get_source <- function(path, text_field, docid_field, replace_specialchar = FALSE, verbosity = 1, ...,
+get_source <- function(path, text_field, docid_field, replace_specialchar = FALSE, verbosity = 1, sep, ...,
                        # deprecated arguments
                        textfield) {
 
@@ -264,7 +265,7 @@ get_source <- function(path, text_field, docid_field, replace_specialchar = FALS
 
     result <- switch(ext,
                txt = get_txt(path, ...),
-               csv = get_csv(path, text_field, docid_field, sep = ",", ...),
+               csv = get_csv(path, text_field, docid_field, sep = sep, ...),
                tsv = get_csv(path, text_field, docid_field, sep = "\t", ...),
                tab = get_csv(path, text_field, docid_field, sep = "\t", ...),
                json = get_json(path, text_field, docid_field, verbosity = verbosity, ...),
diff --git a/man/readtext.Rd b/man/readtext.Rd
index 1317729..ad9823a 100644
--- a/man/readtext.Rd
+++ b/man/readtext.Rd
@@ -15,6 +15,7 @@ readtext(
   encoding = NULL,
   source = NULL,
   cache = TRUE,
+  sep = ",",
   verbosity = readtext_options("verbosity"),
   ...
 )
@@ -109,6 +110,8 @@ as JSON or HTML. Currently supported types are \code{"twitter"} for JSON and
 \item{cache}{if \code{TRUE}, save remote file to a temporary folder. Only used
 when \code{file} is a URL.}
 
+\item{sep}{separator for csv, default to ","}
+
 \item{verbosity}{\itemize{
 \item 0: output errors only
 \item 1: output errors and warnings (default)