-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
The main entry point is the `csv_read()` function, all it does is create a `csv::TableReader` with the `csv_table_reader()` generic and then `$Read()` from it. as in the apache#2947 for feather format, `csv_table_reader` is generic with the methods: - arrow::io::InputStream: calls the TableReader actor with the other options - character and fs_path: depending on the `mmap` option (TRUE by default) it opens the file with `mmap_open()` of `file_open()` and then calls the other method. ``` r library(arrow) tf <- tempfile() readr::write_csv(iris, tf) tab1 <- csv_read(tf) tab1 #> arrow::Table as_tibble(tab1) #> # A tibble: 150 x 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> <dbl> <dbl> <dbl> <dbl> <chr> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # … with 140 more rows ``` <sup>Created on 2018-11-13 by the [reprex package](https://reprex.tidyverse.org) (v0.2.1.9000)</sup> Author: Romain Francois <romain@purrple.cat> Closes apache#2949 from romainfrancois/ARROW-3760/csv_reader and squashes the following commits: 951e9f5 <Romain Francois> s/csv_read/read_csv_arrow/ 7770ec5 <Romain Francois> not using readr:: at this point bb13a76 <Romain Francois> rebase 83b5162 <Romain Francois> s/file_open/ReadableFile/ 959020c <Romain Francois> No need to special use mmap for file path method 6e74003 <Romain Francois> going through CharacterVector makes sure this is a character vector 2585501 <Romain Francois> line breaks for readability 0ab8397 <Romain Francois> linting 09187e6 <Romain Francois> Expose arrow::csv::TableReader, functions csv_table_reader() + csv_read()
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -55,6 +55,7 @@ Collate: | |
'array.R' | ||
'buffer.R' | ||
'compute.R' | ||
'csv.R' | ||
'dictionary.R' | ||
'feather.R' | ||
'io.R' | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,182 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
#' @include R6.R | ||
|
||
`arrow::csv::TableReader` <- R6Class("arrow::csv::TableReader", inherit = `arrow::Object`, | ||
public = list( | ||
Read = function() shared_ptr(`arrow::Table`, csv___TableReader__Read(self)) | ||
) | ||
) | ||
|
||
`arrow::csv::ReadOptions` <- R6Class("arrow::csv::ReadOptions", inherit = `arrow::Object`) | ||
`arrow::csv::ParseOptions` <- R6Class("arrow::csv::ParseOptions", inherit = `arrow::Object`) | ||
`arrow::csv::ConvertOptions` <- R6Class("arrow::csv::ConvertOptions", inherit = `arrow::Object`) | ||
|
||
#' read options for the csv reader | ||
#' | ||
#' @param use_threads Whether to use the global CPU thread pool | ||
#' @param block_size Block size we request from the IO layer; also determines the size of chunks when use_threads is `TRUE` | ||
#' | ||
#' @export | ||
csv_read_options <- function(use_threads = TRUE, block_size = 1048576L) { | ||
shared_ptr(`arrow::csv::ReadOptions`, csv___ReadOptions__initialize( | ||
list( | ||
use_threads = use_threads, | ||
block_size = block_size | ||
) | ||
)) | ||
} | ||
|
||
#' Parsing options | ||
#' | ||
#' @param delimiter Field delimiter | ||
#' @param quoting Whether quoting is used | ||
#' @param quote_char Quoting character (if `quoting` is `TRUE`) | ||
#' @param double_quote Whether a quote inside a value is double-quoted | ||
#' @param escaping Whether escaping is used | ||
#' @param escape_char Escaping character (if `escaping` is `TRUE`) | ||
#' @param newlines_in_values Whether values are allowed to contain CR (`0x0d``) and LF (`0x0a``) characters | ||
#' @param ignore_empty_lines Whether empty lines are ignored. If false, an empty line represents | ||
#' @param header_rows Number of header rows to skip (including the first row containing column names) | ||
#' | ||
#' @export | ||
csv_parse_options <- function( | ||
delimiter = ",", quoting = TRUE, quote_char = '"', | ||
double_quote = TRUE, escaping = FALSE, escape_char = '\\', | ||
newlines_in_values = FALSE, ignore_empty_lines = TRUE, | ||
header_rows = 1L | ||
){ | ||
shared_ptr(`arrow::csv::ParseOptions`, csv___ParseOptions__initialize( | ||
list( | ||
delimiter = delimiter, | ||
quoting = quoting, | ||
quote_char = quote_char, | ||
double_quote = double_quote, | ||
escaping = escaping, | ||
escape_char = escape_char, | ||
newlines_in_values = newlines_in_values, | ||
ignore_empty_lines = ignore_empty_lines, | ||
header_rows = header_rows | ||
) | ||
)) | ||
} | ||
|
||
#' Conversion Options for the csv reader | ||
#' | ||
#' @param check_utf8 Whether to check UTF8 validity of string columns | ||
#' | ||
#' @export | ||
csv_convert_options <- function(check_utf8 = TRUE){ | ||
shared_ptr(`arrow::csv::ConvertOptions`, csv___ConvertOptions__initialize( | ||
list( | ||
check_utf8 = check_utf8 | ||
) | ||
)) | ||
} | ||
|
||
#' CSV table reader | ||
#' | ||
#' @param file file | ||
#' @param read_options, see [csv_read_options()] | ||
#' @param parse_options, see [csv_parse_options()] | ||
#' @param convert_options, see [csv_convert_options()] | ||
#' @param ... additional parameters. | ||
#' | ||
#' @export | ||
csv_table_reader <- function(file, | ||
read_options = csv_read_options(), | ||
parse_options = csv_parse_options(), | ||
convert_options = csv_convert_options(), | ||
... | ||
){ | ||
UseMethod("csv_table_reader") | ||
} | ||
|
||
#' @importFrom rlang abort | ||
#' @export | ||
csv_table_reader.default <- function(file, | ||
read_options = csv_read_options(), | ||
parse_options = csv_parse_options(), | ||
convert_options = csv_convert_options(), | ||
... | ||
) { | ||
abort("unsupported") | ||
} | ||
|
||
#' @export | ||
`csv_table_reader.character` <- function(file, | ||
read_options = csv_read_options(), | ||
parse_options = csv_parse_options(), | ||
convert_options = csv_convert_options(), | ||
... | ||
){ | ||
csv_table_reader(fs::path_abs(file), | ||
read_options = read_options, | ||
parse_options = parse_options, | ||
convert_options = convert_options, | ||
... | ||
) | ||
} | ||
|
||
#' @export | ||
`csv_table_reader.fs_path` <- function(file, | ||
read_options = csv_read_options(), | ||
parse_options = csv_parse_options(), | ||
convert_options = csv_convert_options(), | ||
... | ||
){ | ||
csv_table_reader(ReadableFile(file), | ||
read_options = read_options, | ||
parse_options = parse_options, | ||
convert_options = convert_options, | ||
... | ||
) | ||
} | ||
|
||
#' @export | ||
`csv_table_reader.arrow::io::InputStream` <- function(file, | ||
read_options = csv_read_options(), | ||
parse_options = csv_parse_options(), | ||
convert_options = csv_convert_options(), | ||
... | ||
){ | ||
shared_ptr(`arrow::csv::TableReader`, | ||
csv___TableReader__Make(file, read_options, parse_options, convert_options) | ||
) | ||
} | ||
|
||
#' @export | ||
`csv_table_reader.arrow::csv::TableReader` <- function(file, | ||
read_options = csv_read_options(), | ||
parse_options = csv_parse_options(), | ||
convert_options = csv_convert_options(), | ||
... | ||
){ | ||
file | ||
} | ||
|
||
#' Read csv file into an arrow::Table | ||
#' | ||
#' Use arrow::csv::TableReader from [csv_table_reader()] | ||
#' | ||
#' @param ... Used to construct an arrow::csv::TableReader | ||
#' @export | ||
read_csv_arrow <- function(...) { | ||
csv_table_reader(...)$Read() | ||
} | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.