Skip to content

Commit

Permalink
Fix additional S3 support issues
Browse files Browse the repository at this point in the history
re: Unidata#2117
re: Unidata#2119

* Modify libsrc to allow byte-range reading of netcdf-3 files in private S3 buckets; this required using the aws sdk. Also add a test case.
* The aws sdk can sometimes cause problems if the Awd::ShutdownAPI function is not called. So at optional atexit() support to ensure it is called. This is disabled for Windows.
* Add documentation to nczarr.md on how to build and use the aws sdk under windows. Currently it builds, but testing fails.
* Switch testing from stratus to the Unidata bucket on S3.
* Improve support for the s3: url protocol.
* Add a s3 specific utility code file: ds3util.c
* Modify NC_infermodel to attempt to read the magic number of byte-ranged files in S3.

## Misc.

* Move and rename the core S3 SDK wrapper code (libnczarr/zs3sdk.cpp) to libdispatch since it now used in libsrc as well as libnczarr.
* Add calls to nc_finalize in the utilities in case atexit is disabled.
* Add header only json parser to the distribution rather than as a built source.
  • Loading branch information
DennisHeimbigner committed Oct 30, 2021
1 parent 228e7f5 commit f6e25b6
Show file tree
Hide file tree
Showing 59 changed files with 2,329 additions and 636 deletions.
17 changes: 13 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1049,7 +1049,6 @@ IF(!MSVC)
#FIND_LIBRARY(SZIP PATH NAMES szip sz)
SET(SZIP_LIBRARY ${SZIP})
ENDIF()
message("xxx: ${SZIP_FOUND} ; ${SZIP}")
# Define a test flag for have szip library
IF(SZIP_FOUND)
INCLUDE_DIRECTORIES(${SZIP_INCLUDE_DIRS})
Expand Down Expand Up @@ -1133,10 +1132,10 @@ ENDIF()
# See if aws-s3-sdk is available
# But only if enabled
IF(ENABLE_NCZARR_S3)
find_package(AWSSDK REQUIRED)
set(SERVICE s3)
AWSSDK_DETERMINE_LIBS_TO_LINK(SERVICE AWSSDK_LINK_LIBRARIES)
find_package(AWSSDK REQUIRED COMPONENTS s3;core)
IF(AWSSDK_FOUND)
SET(service s3;core)
AWSSDK_DETERMINE_LIBS_TO_LINK(service AWS_LINK_LIBRARIES)
SET(ENABLE_S3_SDK ON CACHE BOOL "S3 SDK" FORCE)
ELSE()
SET(ENABLE_S3_SDK OFF CACHE BOOL "S3 SDK" FORCE)
Expand Down Expand Up @@ -1712,7 +1711,17 @@ CHECK_FUNCTION_EXISTS(fileno HAVE_FILENO)

CHECK_FUNCTION_EXISTS(clock_gettime HAVE_CLOCK_GETTIME)
CHECK_SYMBOL_EXISTS("struct timespec" "time.h" HAVE_STRUCT_TIMESPEC)
CHECK_FUNCTION_EXISTS(atexit HAVE_ATEXIT)

# Control invoking nc_finalize at exit
OPTION(ENABLE_ATEXIT_FINALIZE "Invoke nc_finalize at exit." ON)
IF(NOT HAVE_ATEXIT)
IF(ENABLE_ATEXIT_FINALIZE AND NOT HAVE_ATEXIT)
SET(ENABLE_ATEXIT_FINALIZE OFF CACHE BOOL "Enable ATEXIT" FORCE)
MESSAGE(WARNING "ENABLE_ATEXIT_FINALIZE set but atexit() function not defined")
ENDIF()
ENDIF()

# Check to see if MAP_ANONYMOUS is defined.
IF(MSVC)
MESSAGE(WARNING "mmap not supported under visual studio: disabling MMAP support.")
Expand Down
1 change: 1 addition & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ This file contains a high-level description of this package's evolution. Release

## 4.8.2 - TBD

* [Enhancement] Support byte-range reading of netcdf-3 files stored in private buckets in S3. See [Github #2???](https://github.com/Unidata/netcdf-c/pull/2???)
* [Enhancement] Support Amazon S3 access for NCZarr. Also support use of the existing Amazon SDK credentials system. See [Github #2114](https://github.com/Unidata/netcdf-c/pull/2114)
* [Bug Fix] Fix string allocation error in H5FDhttp.c. See [Github #2127](https://github.com/Unidata/netcdf-c/pull/2127).
* [Bug Fix] Apply patches for ezxml and for selected oss-fuzz detected errors. See [Github #2125](https://github.com/Unidata/netcdf-c/pull/2125).
Expand Down
9 changes: 7 additions & 2 deletions config.h.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@ are set when opening a binary file on Windows. */
/* set this only when building a DLL under MinGW */
#cmakedefine DLL_NETCDF 1

/* if true, use atexist */
#cmakedefine ENABLE_ATEXIT_FINALIZE 1

/* if true, build byte-range Client */
#cmakedefine ENABLE_BYTERANGE 1

Expand Down Expand Up @@ -183,10 +186,12 @@ are set when opening a binary file on Windows. */
/* Define to 1 if you have `alloca', as a function or macro. */
#cmakedefine HAVE_ALLOCA 1

/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
*/
/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix). */
#cmakedefine HAVE_ALLOCA_H 1

/* Define to 1 if you have the `atexit function. */
#cmakedefine HAVE_ATEXIT 1

/* Define to 1 if you have hdf5_coll_metadata_ops */
#cmakedefine HDF5_HAS_COLL_METADATA_OPS 1

Expand Down
23 changes: 23 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -1113,6 +1113,29 @@ if test "x$enable_byterange" = xyes; then
AC_DEFINE([ENABLE_BYTERANGE], [1], [if true, support byte-range read of remote datasets.])
fi

# Does the user want to disable atexit?
AC_MSG_CHECKING([whether nc_finalize should be invoked at exit])
AC_ARG_ENABLE([atexit-finalize],
[AS_HELP_STRING([--disable-atexit-finalize],
[disable invoking nc_finalize at exit])])
test "x$enable_atexit_finalize" = xno || enable_atexit_finalize=yes
AC_MSG_RESULT($enable_atexit_finalize)

# Check for atexit
AC_CHECK_FUNCS([atexit])

# If no atexit, then disable atexit finalize
if test "x$enable_atexit_finalize" = xyes ; then
if test "x$ac_cv_func_function" = xno ; then
enable_atexit_finalize=no
AC_MSG_ERROR([atexit() required for enable-atexit-finalize.])
fi
fi

if test "x$enable_atexit_finalize" = xyes ; then
AC_DEFINE([ENABLE_ATEXIT_FINALIZE], [1], [If true, enable nc_finalize via atexit()])
fi

# Need libdl(d) for plugins
AC_CHECK_LIB([dl],[dlopen],[have_libdld=yes],[have_libdld=no])
if test "x$have_libdld" = "xyes" ; then
Expand Down
109 changes: 85 additions & 24 deletions docs/nczarr.md
Original file line number Diff line number Diff line change
Expand Up @@ -554,24 +554,6 @@ The relevant tests for S3 support are in the _nczarr_test_ directory.
Currently, by default, testing of S3 with NCZarr is supported only for Unidata members of the NetCDF Development Group.
This is because it uses a Unidata-specific bucket is inaccessible to the general user.
However, an untested mechanism exists by which others may be able to run the S3 specific tests.
If someone else wants to attempt these tests, then they need to define the following environment variables:
* NCZARR_S3_TEST_HOST=\<host\>
* NCZARR_S3_TEST_BUCKET=\<bucket-name\>
This assumes a Path Style address (see above) where
* host -- the complete host part of the url
* bucket -- a bucket in which testing can occur without fear of damaging anything.
_Example:_
````
NCZARR_S3_TEST_HOST=s3.us-west-1.amazonaws.com
NCZARR_S3_TEST_BUCKET=testbucket
````
If anyone tries to use this mechanism, it would be appreciated
it any difficulties were reported to Unidata as a Github issue.
# Appendix B. Building aws-sdk-cpp {#nczarr_s3sdk}
In order to use the S3 storage driver, it is necessary to install the Amazon [aws-sdk-cpp library](https://github.com/aws/aws-sdk-cpp.git).
Expand All @@ -580,19 +562,22 @@ Building this package from scratch has proven to be a formidable task.
This appears to be due to dependencies on very specific versions of,
for example, openssl.
However, the following context does work. Of course your mileage may vary.
## **nix** Build
For linux, the following context works. Of course your mileage may vary.
* OS: ubuntu 21
* aws-sdk-cpp version 1.9.96 or later?
* aws-sdk-cpp version 1.9.96 (or later?)
* Required installed libraries: openssl, libcurl, cmake, ninja (ninja-build in apt)
The recipe used:
### AWS-SDK-CPP Build Recipe
````
git clone --recurse-submodules https://www.github.com/aws/aws-sdk-cpp
pushd aws-sdk-cpp
mkdir build
cd build
PREFIX=/usr/local
FLAGS="-DCMAKE_INSTALL_PREFIX=${PREFIX}
FLAGS="-DCMAKE_INSTALL_PREFIX=${PREFIX} \
-DCMAKE_INSTALL_LIBDIR=lib \
-DCMAKE_MODULE_PATH=${PREFIX}/lib/cmake \
-DCMAKE_POLICY_DEFAULT_CMP0075=NEW \
Expand All @@ -608,8 +593,84 @@ cd ..
popd
````
For Windows we do not yet have solution. If you successfully install
on Windows, please let us know how you did it.
### NetCDF Build
In order to build netcdf-c with S3 sdk support,
the following options must be specified for ./configure.
````
--enable-nczarr-s3
````
If you have access to the Unidata bucket on Amazon, then you can
also test S3 support with this option.
````
--enable-nczarr-s3-tests
````
## Windows build
It is possible to build and install aws-sdk-cpp. It is also possible
to build netcdf-c using cmake. Unfortunately, testing currently fails.
For Windows, the following context work. Of course your mileage may vary.
* OS: Windows 10 64-bit with Visual Studio community edition 2019.
* aws-sdk-cpp version 1.9.96 (or later?)
* Required installed libraries: openssl, libcurl, cmake
### AWS-SDK-CPP Build Recipe
This command-line build assumes one is using Cygwin or Mingw to provide
tools such as bash.
````
git clone --recurse-submodules https://www.github.com/aws/aws-sdk-cpp
pushd aws-sdk-cpp
mkdir build
cd build
CFG="Release"
PREFIX="c:/tools/aws-sdk-cpp"
FLAGS="-DCMAKE_INSTALL_PREFIX=${PREFIX} \
-DCMAKE_INSTALL_LIBDIR=lib" \
-DCMAKE_MODULE_PATH=${PREFIX}/cmake \
-DCMAKE_POLICY_DEFAULT_CMP0075=NEW \
-DBUILD_ONLY=s3 \
-DENABLE_UNITY_BUILD=ON \
-DCMAKE_BUILD_TYPE=$CFG \
-DSIMPLE_INSTALL=ON"
rm -fr build
mkdir -p build
cd build
cmake -DCMAKE_BUILD_TYPE=${CFG} $FLAGS ..
cmake --build . --config ${CFG}
cmake --install . --config ${CFG}
cd ..
popd
````
Notice that the sdk is being installed in the directory "c:\tools\aws-sdk-cpp"
rather than the default location "c:\Program Files (x86)/aws-sdk-cpp-all"
This is because when using a command line, an install path that contains
blanks may not work.
### NetCDF CMake Build
Enabling S3 support is controlled by these two cmake options:
````
-DENABLE_NCZARR_S3=ON
-DENABLE_NCZARR_S3_TESTS=OFF
````
However, to find the aws sdk libraries,
the following environment variables must be set:
````
AWSSDK_ROOT_DIR="c:/tools/aws-sdk-cpp"
AWSSDKBIN="/cygdrive/c/tools/aws-sdk-cpp/bin"
PATH="$PATH:${AWSSDKBIN}"
````
Then the following options must be specified for cmake.
````
-DAWSSDK_ROOT_DIR=${AWSSDK_ROOT_DIR}
-DAWSSDK_DIR=${AWSSDK_ROOT_DIR}/lib/cmake/AWSSDK"
````
# Appendix C. Amazon S3 Imposed Limits {#nczarr_s3limits}
Expand Down
4 changes: 4 additions & 0 deletions include/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ IF(ENABLE_PNETCDF OR ENABLE_PARALLEL4)
COMPONENT headers)
ENDIF()

#INSTALL(FILES ${netCDF_BINARY_DIR}/include/netcdf_json.h
# DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
# COMPONENT headers)

FILE(GLOB CUR_EXTRA_DIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/*.h)
SET(CUR_EXTRA_DIST ${CUR_EXTRA_DIST} Makefile.am CMakeLists.txt)
ADD_EXTRA_DIST("${CUR_EXTRA_DIST}")
10 changes: 5 additions & 5 deletions include/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ nc4internal.h nctime.h nc3internal.h onstack.h ncrc.h ncauth.h \
ncoffsets.h nctestserver.h nc4dispatch.h nc3dispatch.h ncexternl.h \
ncpathmgr.h ncindex.h hdf4dispatch.h hdf5internal.h nc_provenance.h \
hdf5dispatch.h ncmodel.h isnan.h nccrc.h ncexhash.h ncxcache.h \
ncfilter.h ncjson.h ezxml.h
ncfilter.h ncjson.h ezxml.h ncs3sdk.h

if USE_DAP
noinst_HEADERS += ncdap.h
Expand All @@ -32,10 +32,10 @@ endif

EXTRA_DIST = CMakeLists.txt XGetopt.h netcdf_meta.h.in netcdf_dispatch.h.in

DISTCLEANFILES = netcdf_json.h

BUILT_SOURCES = netcdf_json.h
netcdf_json.h: Makefile ${srcdir}/ncjson.h ${srcdir}/../libdispatch/ncjson.c
# netcdf_json.h is part of the distribution.
# If either of the files ncjson.h ../libdispatch/ncjson.c is changed
# then netcdf_json.h should be reconstructed using this recipe.
build_netcdf_json.h::
sed -e 's/NCJSON_H/NETCDF_JSON_H/' -e '/ncjson.h/d' <${srcdir}/ncjson.h > $@
sed -e '/ncjson.h/d' < ${srcdir}/../libdispatch/ncjson.c >> $@

12 changes: 6 additions & 6 deletions include/hdf5internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,12 @@ struct NCauth;
/** Struct to hold HDF5-specific info for the file. */
typedef struct NC_HDF5_FILE_INFO {
hid_t hdfid;
#if defined(ENABLE_BYTERANGE) || defined(ENABLE_HDF5_ROS3) || defined(ENABLE_S3_SDK)
struct HTTP {
NCURI* uri; /* Parse of the incoming path, if url */
int iosp; /* We are using the S3 rawvirtual file driver */
struct NCauth* auth;
} http;
#if defined(ENABLE_BYTERANGE)
int byterange;
NCURI* uri; /* Parse of the incoming path, if url */
#if defined(ENABLE_HDF5_ROS3) || defined(ENABLE_S3_SDK)
struct NCauth* auth;
#endif
#endif
} NC_HDF5_FILE_INFO_T;

Expand Down
2 changes: 1 addition & 1 deletion include/nchttp.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ typedef struct NC_HTTP_STATE {

extern int nc_http_init(NC_HTTP_STATE** state);
extern int nc_http_init_verbose(NC_HTTP_STATE** state, int verbose);
extern int nc_http_size(NC_HTTP_STATE* state, const char* url, long long* sizep);
extern int nc_http_size(NC_HTTP_STATE* state, const char* url, long long unsigned* sizep);
extern int nc_http_read(NC_HTTP_STATE* state, const char* url, size64_t start, size64_t count, NCbytes* buf);
extern int nc_http_write(NC_HTTP_STATE* state, const char* url, NCbytes* payload);
extern int nc_http_close(NC_HTTP_STATE* state);
Expand Down
21 changes: 18 additions & 3 deletions include/ncrc.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,14 @@ struct AWSentry {
char* value;
};

typedef struct NCS3INFO {
char* host; /* non-null if other*/
char* region; /* region */
char* bucket; /* bucket name */
char* rootkey;
char* profile;
} NCS3INFO;

#if defined(__cplusplus)
extern "C" {
#endif
Expand Down Expand Up @@ -95,15 +103,22 @@ EXTERNL char* NC_entityescape(const char* s);
EXTERNL int NC_readfile(const char* filename, NCbytes* content);
EXTERNL int NC_writefile(const char* filename, size_t size, void* content);
EXTERNL char* NC_mktmp(const char* base);
EXTERNL int NC_getmodelist(const char* url, NClist** modelistp);
EXTERNL int NC_testmode(const char* path, const char* tag);
EXTERNL int NC_getmodelist(const char* modestr, NClist** modelistp);
EXTERNL int NC_testmode(NCURI* uri, const char* tag);
EXTERNL int NC_testpathmode(const char* path, const char* tag);
EXTERNL int NC_split_delim(const char* path, char delim, NClist* segments);
EXTERNL int NC_join(struct NClist* segments, char** pathp);

/* From ds3util.c */
/* S3 profiles */
EXTERNL int NC_s3urlrebuild(NCURI* url, NCURI** newurlp, char** bucketp, char** regionp);
EXTERNL int NC_getactives3profile(NCURI* uri, const char** profilep);
EXTERNL int NC_getdefaults3region(NCURI* uri, const char** regionp);
/* S3 profiles */
EXTERNL int NC_authgets3profile(const char* profile, struct AWSprofile** profilep);
EXTERNL int NC_s3profilelookup(const char* profile, const char* key, const char** valuep);
EXTERNL int NC_s3urlprocess(NCURI* url, NCS3INFO* s3);
EXTERNL int NC_s3clear(NCS3INFO* s3);
EXTERNL int NC_iss3(NCURI* uri);

#if defined(__cplusplus)
}
Expand Down
31 changes: 31 additions & 0 deletions include/ncs3sdk.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Copyright 2018, University Corporation for Atmospheric Research
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
*/

#ifndef NCS3SDK_H
#define NCS3SDK_H 1

#ifdef __cplusplus
extern "C" {
#endif

EXTERNL void NC_s3sdkinitialize(void);
EXTERNL void NC_s3sdkfinalize(void);
EXTERNL void* NC_s3sdkcreateclient(NCS3INFO* context);
EXTERNL int NC_s3sdkbucketexists(void* s3client, const char* bucket, int* existsp, char** errmsgp);
EXTERNL int NC_s3sdkbucketcreate(void* s3client, const char* region, const char* bucket, char** errmsgp);
EXTERNL int NC_s3sdkbucketdelete(void* s3client, const char* region, const char* bucket, char** errmsgp);
EXTERNL int NC_s3sdkinfo(void* client0, const char* bucket, const char* pathkey, unsigned long long* lenp, char** errmsgp);
EXTERNL int NC_s3sdkread(void* client0, const char* bucket, const char* pathkey, unsigned long long start, unsigned long long count, void* content, char** errmsgp);
EXTERNL int NC_s3sdkwriteobject(void* client0, const char* bucket, const char* pathkey, unsigned long long count, const void* content, char** errmsgp);
EXTERNL int NC_s3sdkclose(void* s3client0, NCS3INFO* info, int deleteit, char** errmsgp);
EXTERNL int NC_s3sdkgetkeys(void* s3client0, const char* bucket, const char* prefix, size_t* nkeysp, char*** keysp, char** errmsgp);
EXTERNL int NC_s3sdksearch(void* s3client0, const char* bucket, const char* prefixkey0, size_t* nkeysp, char*** keysp, char** errmsgp);
EXTERNL int NC_s3sdkdeletekey(void* client0, const char* bucket, const char* pathkey, char** errmsgp);

#ifdef __cplusplus
}
#endif

#endif /*NCS3SDK_H*/
4 changes: 2 additions & 2 deletions include/ncuri.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
#define NCURIENCODEQUERY 32 /* If output url query should be encoded */
#define NCURIENCODE (NCURIENCODEPATH|NCURIENCODEQUERY)
#define NCURIBASE (NCURIPWD|NCURIPATH)
#define NCURISVC (NCURIQUERY|NCURIBASE) /* for sending to server */
#define NCURIALL (NCURIPATH|NCURIPWD|NCURIQUERY|NCURIFRAG) /* for rebuilding after changes */
#define NCURISVC (NCURIBASE|NCURIQUERY) /* for sending to server */
#define NCURIALL (NCURIBASE|NCURIQUERY|NCURIFRAG) /* for rebuilding after changes */

/*! This is an open structure meaning
it is ok to directly access its fields
Expand Down
Loading

0 comments on commit f6e25b6

Please sign in to comment.