Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support VFS-based read and write connections #710

Merged
merged 11 commits into from
May 28, 2024
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@ export(tiledb_vfs_unserialize)
export(tiledb_vfs_write)
export(toMatrix)
export(toSparseMatrix)
export(vfs_file)
exportClasses(tiledb_array)
exportClasses(tiledb_array_schema)
exportClasses(tiledb_array_schema_evolution)
Expand Down
11 changes: 10 additions & 1 deletion R/Init.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# MIT License
#
# Copyright (c) 2017-2023 TileDB Inc.
# Copyright (c) 2017-2024 TileDB Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -55,6 +55,15 @@

## call setter for Rcpp plugin support
.set_compile_link_options()

lib_path <- system.file("lib", .Platform$r_arch, paste0("libconnection", .Platform$dynlib.ext), package = "tiledb")
res <- dyn.load(lib_path)
.Call(`_tiledb_tldb_init_`, res$new_connection$address, PACKAGE="tiledb")
}

.onUnload <- function(libname) {
lib_path <- system.file("lib", .Platform$r_arch, paste0("libconnection", .Platform$dynlib.ext), package = "tiledb")
dyn.unload(lib_path)
}

.onAttach <- function(libname, pkgname) {
Expand Down
37 changes: 37 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -1184,3 +1184,40 @@ vlcbuf_from_shmem <- function(datapath, dtype) {
.Call(`_tiledb_vlcbuf_from_shmem`, datapath, dtype)
}

#' Create a custom file connection
#'
#' @details
#' This \code{vfs_file()} connection works like the \code{file()} connection in R itself.
#'
#' This connection works with both ASCII and binary data, e.g. using
#' \code{readLines()} and \code{readBin()}.
#'
#' @param description path to a filename; contrary to \code{rconnection} a connection
#' object is not supported.
#' @param open character string. A description of how to open the connection if
#' it is to be opened upon creation e.g. "rb". Default "" (empty string) means
#' to not open the connection on creation - user must still call \code{open()}.
#' Note: If an "open" string is provided, the user must still call \code{close()}
#' otherwise the contents of the file aren't completely flushed until the
#' connection is garbage collected.
#' @param verbosity integer value 0, 1, or 2. Default: 0.
#' Set to \code{0} for no debugging messages, \code{1} for some high-level messages
#' and \code{verbosity = 2} for all debugging messages.
#'
#' @export
#'
#' @examples
#' \dontrun{
#' tmp <- tempfile()
#' dat <- as.raw(1:255)
#' writeBin(dat, vfs_file(tmp))
#' readBin(vfs_file(tmp), raw(), 1000)
#' }
vfs_file <- function(description, mode = "", verbosity = 0L) {
.Call(`_tiledb_vfs_file`, description, mode, verbosity)
}

tldb_init_ <- function(nc_xptr) {
invisible(.Call(`_tiledb_tldb_init_`, nc_xptr))
}

4 changes: 2 additions & 2 deletions cleanup
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/sh

rm -f src/Makevars src/*.o src/*.so config.log config.status inst/tiledb-*.tar.gz
rm -rf tiledb.Rcheck autom4te.cache inst/tiledb/ inst/config.log inst/config.status tiledb/
rm -f src/Makevars src/*.o src/*.so config.log config.status inst/tiledb-*.tar.gz src/connection/*.o
rm -rf tiledb.Rcheck autom4te.cache inst/tiledb/ inst/config.log inst/config.status tiledb/ inst/lib/
8 changes: 8 additions & 0 deletions inst/examples/ex_s3_connection.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@

library(tiledb)

uri <- "s3://tiledb-dirk/tempfolder/penguins.csv"
pp <- read.csv(vfs_file(uri))
summary(pp)

## write.csv also works via vfs_file(...)
8 changes: 8 additions & 0 deletions inst/examples/ex_s3_serialize.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@

library(tiledb)

uri <- "s3://tiledb-dirk/serializeDemo"
tiledb_vfs_serialize(palmerpenguins::penguins, uri)

newpp <- tiledb_vfs_unserialize(uri)
head(newpp)
13 changes: 13 additions & 0 deletions inst/tinytest/test_vfs_file.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
library(tinytest)

tmp <- tempfile()
ref <- as.raw(1:255)
writeBin(ref, tiledb:::vfs_file(tmp))
tst <- readBin(tiledb:::vfs_file(tmp), raw(), 1000)
expect_identical(tst, ref)

tmp <- tempfile()
ref <- as.character(mtcars)
writeLines(ref, tiledb:::vfs_file(tmp))
tst <- readLines(tiledb:::vfs_file(tmp))
expect_identical(tst, ref)
40 changes: 40 additions & 0 deletions man/vfs_file.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 15 additions & 2 deletions src/Makevars.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,25 @@ PKG_CPPFLAGS = -I. -I../inst/include/ @CXX17_MACOS@ @TILEDB_INCLUDE@ @TILEDB_SIL
## We also need the TileDB library
PKG_LIBS = @CXX17_MACOS@ @TILEDB_LIBS@ @TILEDB_RPATH@

all: $(SHLIB)
LIB_CON_DIR = ../inst/lib$(R_ARCH)
LIB_CON = $(LIB_CON_DIR)/libconnection.so

all: $(OBJECTS) $(LIB_CON) $(SHLIB)
# if we are
# - on macOS aka Darwin which needs this
# - the library is present (implying non-system library use)
# then let us call install_name_tool
@if [ `uname -s` = 'Darwin' ] && [ -f ../inst/tiledb/lib/libtiledb.dylib ] && [ -f tiledb.so ]; then \
@if [ `uname -s` = 'Darwin' ]; then \
install_name_tool -change libz.1.dylib @rpath/libz.1.dylib ../inst/tiledb/lib/libtiledb.dylib; \
install_name_tool -add_rpath @loader_path/../tiledb/lib tiledb.so; \
install_name_tool -add_rpath @loader_path/../tiledb/lib $(LIB_CON); \
fi

$(LIB_CON): connection/connection.o
@mkdir -p $(LIB_CON_DIR)
@$(SHLIB_LINK) $(SHLIB_LIBADD) $(LIBR) -o $@ $^

clean:
rm -f $(SHLIB) $(OBJECTS) $(LIB_CON) connection/connection.o

.PHONY: all clean
12 changes: 10 additions & 2 deletions src/Makevars.win
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,18 @@ PKG_LIBS = \
-lBCrypt -lKernel32 -lRpcrt4 -lWininet -lWinhttp -lWs2_32 -lShlwapi -lUserenv -lversion -lws2_32 \
-lsharpyuv

all: clean winlibs
LIB_CON = ../inst/lib$(R_ARCH)/libconnection.dll

all: winlibs $(OBJECTS) $(SHLIB) $(LIB_CON)

$(LIB_CON): connection/connection.o
mkdir -p $(dir $(LIB_CON))
$(SHLIB_LINK) $(LIBR) -o $@ $^

winlibs:
"$(R_HOME)/bin$(R_ARCH_BIN)/Rscript.exe" "../tools/winlibs.R"

clean:
rm -f $(SHLIB) $(OBJECTS)
rm -f $(SHLIB) $(OBJECTS) $(LIB_CON) connection/connection.o

.PHONY: all clean
25 changes: 25 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3537,6 +3537,29 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// vfs_file
SEXP vfs_file(std::string description, std::string mode, int verbosity);
RcppExport SEXP _tiledb_vfs_file(SEXP descriptionSEXP, SEXP modeSEXP, SEXP verbositySEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< std::string >::type description(descriptionSEXP);
Rcpp::traits::input_parameter< std::string >::type mode(modeSEXP);
Rcpp::traits::input_parameter< int >::type verbosity(verbositySEXP);
rcpp_result_gen = Rcpp::wrap(vfs_file(description, mode, verbosity));
return rcpp_result_gen;
END_RCPP
}
// tldb_init_
void tldb_init_(SEXP nc_xptr);
RcppExport SEXP _tiledb_tldb_init_(SEXP nc_xptrSEXP) {
BEGIN_RCPP
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< SEXP >::type nc_xptr(nc_xptrSEXP);
tldb_init_(nc_xptr);
return R_NilValue;
END_RCPP
}

static const R_CallMethodDef CallEntries[] = {
{"_tiledb_libtiledb_query_export_buffer", (DL_FUNC) &_tiledb_libtiledb_query_export_buffer, 3},
Expand Down Expand Up @@ -3832,6 +3855,8 @@ static const R_CallMethodDef CallEntries[] = {
{"_tiledb_vlcbuf_to_shmem", (DL_FUNC) &_tiledb_vlcbuf_to_shmem, 4},
{"_tiledb_querybuf_from_shmem", (DL_FUNC) &_tiledb_querybuf_from_shmem, 2},
{"_tiledb_vlcbuf_from_shmem", (DL_FUNC) &_tiledb_vlcbuf_from_shmem, 2},
{"_tiledb_vfs_file", (DL_FUNC) &_tiledb_vfs_file, 3},
{"_tiledb_tldb_init_", (DL_FUNC) &_tiledb_tldb_init_, 1},
{NULL, NULL, 0}
};

Expand Down
11 changes: 11 additions & 0 deletions src/connection/connection.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@

// borrowed from https://github.com/r-lib/archive/blob/aaf6341b674b974382574c16723b0a46f7ccd50c/src/connection/connection.c

#include "connection.h"

SEXP new_connection(const char* description,
const char* mode,
const char* class_name,
Rconnection* ptr) {
return R_new_custom_connection(description, mode, class_name, ptr);
}
35 changes: 35 additions & 0 deletions src/connection/connection.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// borrowed from https://github.com/r-lib/archive/blob/aaf6341b674b974382574c16723b0a46f7ccd50c/src/connection/connection.h

#pragma once

#include "Rinternals.h"

// clang-format off
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wkeyword-macro"
#endif
#define class class_name
#define private private_ptr
#include <R_ext/Connections.h>
#undef class
#undef private
#ifdef __clang__
# pragma clang diagnostic pop
#endif
// clang-format on

#ifdef __cplusplus
extern "C" {
#endif

SEXP new_connection(const char* description,
const char* mode,
const char* class_name,
Rconnection* ptr);

size_t read_connection(SEXP connection, void* buf, size_t n);

#ifdef __cplusplus
}
#endif
Loading
Loading