Skip to content

Commit

Permalink
Merge pull request #1153 from sony/feature/20221205-fix-2G-bigfile-pr…
Browse files Browse the repository at this point in the history
…oblem

support larger than 2GB file by refining the use of hdf5
  • Loading branch information
YukioOobuchi authored Jan 10, 2023
2 parents 49bf13c + 553898e commit d97fb23
Show file tree
Hide file tree
Showing 13 changed files with 159 additions and 34 deletions.
23 changes: 17 additions & 6 deletions build-tools/cmake/Utils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -27,28 +27,39 @@ endfunction()
# Find HDF5 source package
#
function(prepend lib_paths prefix)
if (WIN32)
set(postfix ".lib")
set(libfix "")
else()
set(postfix ".so")
set(libfix "lib")
endif()
set(listvar "")
foreach(f ${ARGN})
if (CMAKE_BUILD_TYPE MATCHES Debug)
list(APPEND listvar "${prefix}/lib${f}_debug.so")
list(APPEND listvar "${prefix}/${libfix}${f}_debug${postfix}")
else()
list(APPEND listvar "${prefix}/lib${f}.so")
list(APPEND listvar "${prefix}/${libfix}${f}${postfix}")
endif()
endforeach(f)
set (${lib_paths} "${listvar}" PARENT_SCOPE)
endfunction(prepend)

function(findhdf5)
if (WIN32)
set(build_type "${CMAKE_BUILD_TYPE}")
else()
set(build_type "")
endif()
set(HDF5_INCLUDE_DIRS ${PROJECT_SOURCE_DIR}/third_party/hdf5-master/src
${PROJECT_SOURCE_DIR}/third_party/hdf5-master/hl/src
${CMAKE_BINARY_DIR}/third_party/hdf5-master)
prepend(HDF5_LIBRARIES ${CMAKE_BINARY_DIR}/third_party/hdf5-master/bin/ ${HDF5_LIBRARIES_TO_EXPORT})
${CMAKE_BINARY_DIR}/third_party/hdf5-master
${CMAKE_BINARY_DIR}/third_party/hdf5-master/src)
prepend(HDF5_LIBRARIES ${CMAKE_BINARY_DIR}/third_party/hdf5-master/bin/${build_type} "hdf5" "hdf5_hl")
set(HDF5_INCLUDE_DIRS "${HDF5_INCLUDE_DIRS}" PARENT_SCOPE)
set(HDF5_LIBRARIES "${HDF5_LIBRARIES}" PARENT_SCOPE)
endfunction(findhdf5)



################################################################################################
# Clears variables from list
# Usage:
Expand Down
16 changes: 15 additions & 1 deletion build-tools/make/build.mk
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,20 @@ nnabla-doc:
$(NNABLA_DIRECTORY)
make -C $(DOC_DIRECTORY) -j$(PARALLEL_BUILD_NUM) all wheel doc


########################################################################################################################
# Third_party preparation for subsequent step of CI pipeline
.PHONY: nnabla-cpplib-collect
nnabla-cpplib-collect:
@cd $(BUILD_DIRECTORY_CPPLIB) && \
if [ -d third_party/hdf5-master/bin ]; then \
find third_party/hdf5-master/bin -name "libhdf5*.*" \
-not -name "libhdf5*_tools.*" \
-not -name "libhdf5*_test.*" -print0\
|xargs -0 cp -Rt lib; \
fi


########################################################################################################################
# Build and test.
.PHONY: nnabla-cpplib
Expand Down Expand Up @@ -191,7 +205,7 @@ nnabla-shell:
########################################################################################################################
# test
.PHONY: nnabla-test-cpplib
nnabla-test-cpplib: nnabla-cpplib
nnabla-test-cpplib: nnabla-cpplib nnabla-cpplib-collect
@$(MAKE) -C $(BUILD_DIRECTORY_CPPLIB) cpplibtest
@$(MAKE) -C $(BUILD_DIRECTORY_CPPLIB) test_nbla_utils
@bash -c "(cd $(BUILD_DIRECTORY_CPPLIB) && ctest -R cpplibtest --output-on-failure)"
Expand Down
6 changes: 5 additions & 1 deletion build-tools/make/options.mk
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,11 @@ DOCKER_RUN_OPTS += -e NNABLA_VERSION=$(NNABLA_VERSION)
NNABLA_UTILS_STATIC_LINK_DEPS ?= OFF
DOCKER_RUN_OPTS += -e NNABLA_UTILS_STATIC_LINK_DEPS=$(NNABLA_UTILS_STATIC_LINK_DEPS)

NNABLA_UTILS_WITH_HDF5 ?= OFF
NNABLA_UTILS_WITH_HDF5 ?= ON
OS := $(shell uname -s)
ifeq ($(OS), Darwin)
NNABLA_UTILS_WITH_HDF5 = OFF
endif
DOCKER_RUN_OPTS += -e NNABLA_UTILS_WITH_HDF5=$(NNABLA_UTILS_WITH_HDF5)

MAKE_MANYLINUX_WHEEL ?= OFF
Expand Down
3 changes: 2 additions & 1 deletion build-tools/make/test_nbla_utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
# limitations under the License.
BINARY_PATH=$(find . -name "test_nbla_utils")
LD_PATH=$(find . -name "libnnabla_util*.*")
LD_PATH+=" "
LD_PATH+=$(find .. -name "libhdf5*.*")

test_nbla_utils=''

Expand All @@ -24,7 +26,6 @@ done

for ld_p in ${LD_PATH}; do
export LD_LIBRARY_PATH=$(dirname $ld_p):$LD_LIBRARY_PATH
break
done

echo "PATH: $PATH"
Expand Down
3 changes: 3 additions & 0 deletions build-tools/msvc/build_cpplib.bat
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ REM Build third party libraries.
CALL %~dp0tools\build_zlib.bat || GOTO :error
CALL %~dp0tools\build_libarchive.bat || GOTO :error
CALL %~dp0tools\build_protobuf.bat || GOTO :error
CALL %~dp0tools\build_hdf5.bat || GOTO :error


REM Get pre-built lz4 and zstd libraries
CALL %~dp0tools\get_liblz4.bat || GOTO :error
Expand Down Expand Up @@ -77,6 +79,7 @@ cmake -G "%generate_target%" ^
-DProtobuf_PROTOC_EXECUTABLE=%protobuf_protoc_executable% ^
-DZLIB_INCLUDE_DIR=%zlib_include_dir% ^
-DZLIB_LIBRARY_RELEASE=%zlib_library% ^
-DNNABLA_UTILS_WITH_HDF5=ON ^
%nnabla_debug_options% ^
%nnabla_root% || GOTO :error

Expand Down
66 changes: 66 additions & 0 deletions build-tools/msvc/tools/build_hdf5.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
@ECHO OFF

REM Copyright 2022 Sony Group Corporation.
REM
REM Licensed under the Apache License, Version 2.0 (the "License");
REM you may not use this file except in compliance with the License.
REM You may obtain a copy of the License at
REM
REM http://www.apache.org/licenses/LICENSE-2.0
REM
REM Unless required by applicable law or agreed to in writing, software
REM distributed under the License is distributed on an "AS IS" BASIS,
REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
REM See the License for the specific language governing permissions and
REM limitations under the License.

REM Build hdf5 for windows

SET hdf5_folder=%third_party_folder%\hdf5-master

CD %third_party_folder%

call :download_hdf5_and_rename https://github.com/HDFGroup/hdf5/archive/refs/tags/hdf5-1_12_2.zip

CD %third_party_folder%\hdf5-master
IF NOT EXIST build-folder (
MD build-folder
)
CD %third_party_folder%\hdf5-master\build-folder

cmake.exe -G "%generate_target%" .. ^
-DBUILD_STATIC_LIBS=OFF ^
-DONLY_SHARED_LIBS=ON ^
-DBUILD_SHARED_LIBS=ON ^
-DCPACK_SOURCE_ZIP=OFF ^
-DHDF5_BUILD_HL_TOOLS=OFF ^
-DHDF5_BUILD_TOOLS=OFF ^
-DHDF5_BUILD_UTILS=OFF ^
-DHDF5_BUILD_EXAMPLES=OFF ^
-DHDF5_TEST_CPP=OFF ^
-DHDF5_TEST_EXAMPLES=OFF ^
-DHDF5_TEST_JAVA=OFF ^
-DHDF5_TEST_TOOLS=OFF ^
-DHDF5_TEST_VFD=OFF ^
-DHDF5_TEST_SWMR=OFF ^
-DHDF5_TEST_PARALLEL=OFF ^
-DHDF5_BUILD_HL_LIB=ON || GOTO :error
cmake.exe --build . --config %build_type% || GOTO :error

exit /b


:download_hdf5_and_rename
IF NOT EXIST hdf5-master (
powershell "[Net.ServicePointManager]::SecurityProtocol +='tls12'; iwr %nnabla_iwr_options% -Uri %1 -OutFile hdf5-master.zip" || GOTO :error
ECHO downloading %1
cmake -E tar xvzf hdf5-master.zip || GOTO :error
MOVE hdf5-hdf5-1_12_2 hdf5-master
)
exit /b


:error
ECHO failed with error code %errorlevel%.
exit /b %errorlevel%

6 changes: 3 additions & 3 deletions python/src/nnabla/utils/get_file_handle.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,15 +408,15 @@ def _nnp_file_saver(ctx, filename, ext):

param = io.BytesIO()
if ctx.parameters is None:
nn.parameter.save_parameters(param, extension='.protobuf')
nn.parameter.save_parameters(param, extension='.h5')
else:
nn.parameter.save_parameters(
param, ctx.parameters, extension='.protobuf')
param, ctx.parameters, extension='.h5')

with get_file_handle_save(filename, ext) as nnp:
nnp.writestr('nnp_version.txt', version.read())
nnp.writestr('network.nntxt', nntxt.read())
nnp.writestr('parameter.protobuf', param.read())
nnp.writestr('parameter.h5', param.read())


def _h5_parameter_file_saver(ctx, filename, ext):
Expand Down
6 changes: 2 additions & 4 deletions src/nbla_cli/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,10 @@ if (NNABLA_UTILS_WITH_HDF5)
if (NNABLA_UTILS_STATIC_LINK_DEPS)
set(HDF5_USE_STATIC_LIBRARIES 1)
endif()
findhdf5()
findhdf5()
endif()
if (NNABLA_UTILS_STATIC_LINK_DEPS)
if (NNABLA_UTILS_WITH_HDF5)
set(HDF5_LIBRARIES ${HDF5_LIBRARIES} ${CMAKE_DL_LIBS})
endif()
set(HDF5_LIBRARIES ${HDF5_LIBRARIES})
endif()

if (NNABLA_UTILS_WITH_HDF5)
Expand Down
32 changes: 24 additions & 8 deletions src/nbla_utils/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,6 @@ add_library(${LIB_NAME} SHARED
)

if (WIN32)
if (NNABLA_UTILS_WITH_HDF5)
message(FATAL_ERROR, "HDF5 cannot be enabled on Windows.")
error() # Undefined command to abort cmake process here.
endif()
find_package(LibArchive REQUIRED)
find_package(ZLIB REQUIRED)
find_package(Protobuf REQUIRED)
Expand All @@ -93,6 +89,21 @@ if (WIN32)
${LibArchive_LIBRARIES}
${ZLIB_LIBRARIES}
)

if (NNABLA_UTILS_WITH_HDF5)
if (NNABLA_UTILS_STATIC_LINK_DEPS)
set(HDF5_USE_STATIC_LIBRARIES 1)
endif()
findhdf5()
include_directories(
${HDF5_INCLUDE_DIRS}
)
target_link_libraries(${LIB_NAME}
${HDF5_LIBRARIES}
)
add_definitions(-DH5_BUILT_AS_DYNAMIC_LIB)
endif()

else()
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
if (NNABLA_UTILS_WITH_HDF5)
Expand Down Expand Up @@ -183,10 +194,16 @@ if (BUILD_TEST)
include_directories(${PROJECT_SOURCE_DIR}/src/nbla_cli/)
if (NNABLA_UTILS_WITH_HDF5)
list(APPEND NBLA_UTILS_TEST_SOURCES
test/test_expander.cpp
test/test_hdf5_load.cpp
${PROJECT_SOURCE_DIR}/src/nbla_cli/internal.cpp
test/test_load_save.cpp
)
if (NOT WIN32)
# These cases need to prepare test case files, disabled for windows.
list(APPEND NBLA_UTILS_TEST_SOURCES
test/test_expander.cpp
test/test_hdf5_load.cpp
${PROJECT_SOURCE_DIR}/src/nbla_cli/internal.cpp
)
endif()
endif()
if (NNABLA_UTILS_WITH_NPY)
if (NOT WIN32)
Expand All @@ -201,7 +218,6 @@ if (BUILD_TEST)
endif()
list(APPEND NBLA_UTILS_TEST_SOURCES
test/test_load_save_parameters.cpp
test/test_load_save.cpp
)
add_executable(test_nbla_utils ${NBLA_UTILS_TEST_SOURCES})
add_dependencies(test_nbla_utils ${NBLA_LIBRARY_NAME})
Expand Down
2 changes: 2 additions & 0 deletions src/nbla_utils/nnp_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
// limitations under the License.

#ifdef _WIN32
#ifndef NBLA_UTILS_WITH_HDF5
typedef int ssize_t;
#endif
#else
#include <fcntl.h>
#include <unistd.h>
Expand Down
2 changes: 2 additions & 0 deletions src/nbla_utils/nnp_network_expander.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
// limitations under the License.

#ifdef _WIN32
#ifndef NBLA_UTILS_WITH_HDF5
typedef int ssize_t;
#endif
#else
#include <fcntl.h>
#include <unistd.h>
Expand Down
22 changes: 13 additions & 9 deletions src/nbla_utils/parameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@
#include <string>
#include <vector>

// HDF5
#ifdef NBLA_UTILS_WITH_HDF5
#include <hdf5.h>
#include <hdf5_hl.h>
#endif

#include <nbla/computation_graph/variable.hpp>
#include <nbla/initializer.hpp>
#include <nbla/logger.hpp>
Expand All @@ -41,12 +47,6 @@
#include "nnabla.pb.h"
#include "parameters_impl.hpp"

// HDF5
#ifdef NBLA_UTILS_WITH_HDF5
#include <hdf5.h>
#include <hdf5_hl.h>
#endif

using namespace std;

namespace nbla {
Expand All @@ -70,8 +70,8 @@ string get_extension(const string &filename) {
bool parse_hdf5_dataset(string name, hid_t did, ParameterVector &pv) {
hid_t sp = H5Dget_space(did);
int rank = H5Sget_simple_extent_ndims(sp);
hsize_t dims[rank];
herr_t err = H5Sget_simple_extent_dims(sp, dims, nullptr);
nbla::vector<hsize_t> dims(rank);
herr_t err = H5Sget_simple_extent_dims(sp, dims.data(), nullptr);

hsize_t size = H5Dget_storage_size(did);
string variable_name = name.substr(1, name.length());
Expand All @@ -85,7 +85,7 @@ bool parse_hdf5_dataset(string name, hid_t did, ParameterVector &pv) {
err = H5Dread(did, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT,
buffer.get());
if (err >= 0) {
Shape_t shape(dims, dims + rank);
Shape_t shape((int64_t *)(dims.data()), (int64_t *)(dims.data() + rank));
// fix crash bug by replacing bool with int,
// since actual 4 bytes is read.
int need_grad = false; // default need_grad
Expand Down Expand Up @@ -499,7 +499,11 @@ bool save_parameters_h5(const ParameterVector &pv, char *buffer,
#ifdef NBLA_UTILS_WITH_HDF5
hid_t file_id;
nbla::string filename;
#ifndef WIN32
const char *tmp = getenv("TMPDIR");
#else
const char *tmp = getenv("TEMP");
#endif

H5Eset_auto1(NULL, NULL);
if (tmp == 0)
Expand Down
6 changes: 5 additions & 1 deletion third_party/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,15 @@ function(download_and_extract_library NAME EXT URL TYPE)
endfunction()

if(NNABLA_UTILS_WITH_HDF5)
download_and_extract_library(hdf5-master .zip https://github.com/mokus0/hdf5/archive/master.zip DIRECTORY)
download_and_extract_library(hdf5-master .zip https://github.com/HDFGroup/hdf5/archive/refs/tags/hdf5-1_12_2.zip DIRECTORY)
set(HDF5_EXTERNALLY_CONFIGURED, 1)
set(HDF5_EXPORTED_TARGETS "nbla_utils-hdf5-targets")
set(BUILD_SHARED_LIBS ON CACHE BOOL "Build Shared Libraries" FORCE)
set(ONLY_SHARED_LIBS ON CACHE BOOL "Only build shared libraries" FORCE)
set(HDF5_BUILD_HL_LIB ON CACHE BOOL "Build HIGH Level HDF5 Library" FORCE)
if(NOT IS_DIRECTORY ${CMAKE_SOURCE_DIR}/third_party/hdf5-master)
file(RENAME hdf5-hdf5-1_12_2 hdf5-master)
endif()
add_subdirectory(hdf5-master)
install(EXPORT nbla_utils-hdf5-targets DESTINATION "trash")
endif()
Expand Down

0 comments on commit d97fb23

Please sign in to comment.