apache · szha · Feb 26, 2020 · Feb 26, 2020 · Feb 27, 2020 · Feb 28, 2020
@@ -10,7 +10,7 @@ jobs:
         uses: actions/checkout@v2
       - name: Install Dependencies
         run: |
-          brew install nasm automake ninja libtool
+          brew install nasm automake ninja libtool cmake pkgconfig protobuf
       - name: Build project
         run: |
           git --version

@@ -13,6 +13,12 @@ add_library(mshadow INTERFACE)
 file(GLOB_RECURSE MSHADOWSOURCE "mshadow/*.h")
 target_include_directories(mshadow INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}")
 target_sources(mshadow INTERFACE ${MSHADOWSOURCE})
+if(UNIX)
+  target_compile_options(mshadow INTERFACE
+    "$<$<COMPILE_LANGUAGE:CXX>:-Wno-unused-parameter>"
+    "$<$<COMPILE_LANGUAGE:CXX>:-Wno-unknown-pragmas>"
+    "$<$<COMPILE_LANGUAGE:CXX>:-Wno-unused-local-typedefs>")
+endif()
 
 if(USE_CUDA)
   enable_language(CUDA)
@@ -67,7 +73,8 @@ else()
 endif()
 
 set(mshadow_LINT_DIRS mshadow mshadow-ps)
+find_package(Python3)
 add_custom_target(mshadow_lint COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC}
-  -DPYTHON_EXECUTABLE=${PYTHON_EXECUTABLE} -DLINT_DIRS=${mshadow_LINT_DIRS}
+  -DPYTHON_EXECUTABLE=${Python3_EXECUTABLE} -DLINT_DIRS=${mshadow_LINT_DIRS}
   -DPROJECT_SOURCE_DIR=${PROJECT_SOURCE_DIR} -DPROJECT_NAME=mshadow
   -P ${PROJECT_SOURCE_DIR}/../dmlc-core/cmake/lint.cmake)
diff --git a/3rdparty/mshadow/cmake/AutoDetectF16C.cmake b/3rdparty/mshadow/cmake/AutoDetectF16C.cmake
@@ -25,8 +25,11 @@ if(AUTO_DETECT_F16_CMAKE_INCLUDED)
   return()
 endif()
 set(AUTO_DETECT_F16_CMAKE_INCLUDED True)
-
 set(SUPPORT_F16C False)
+if(ANDROID)
+    message("F16C instruction set is not yet supported for Andriod")
+    return()
+endif()
 if(MSVC)
     message("F16C instruction set is not yet supported for MSVC")
     return()

diff --git a/3rdparty/mshadow/mshadow/base.h b/3rdparty/mshadow/mshadow/base.h
@@ -18,12 +18,13 @@
 #define NOMINMAX
 #endif
 #endif
-#include <cmath>
-#include <cstdio>
+#include <algorithm>
 #include <cfloat>
 #include <climits>
-#include <algorithm>
+#include <cmath>
+#include <cstdio>
 #include <functional>
+#include <limits>
 #include <sstream>
 #include <string>
 
@@ -839,7 +840,7 @@ MSHADOW_XINLINE bool MaxValue<bool>(void) {
 /*! \brief maximum value of uint32_t */
 template<>
 MSHADOW_XINLINE uint32_t MaxValue<uint32_t>(void) {
-  return -1;
+  return std::numeric_limits<uint32_t>::max();
 }
 
 /*!

@@ -51,6 +51,7 @@ else()
 endif()
 option(USE_GPERFTOOLS "Build with GPerfTools support" OFF)
 option(USE_JEMALLOC "Build with Jemalloc support" OFF)
+option(USE_LIBJPEG_TURBO "Use libjpeg-turbo" OFF)
 option(USE_DIST_KVSTORE "Build with DIST_KVSTORE support" OFF)
 option(USE_PLUGINS_WARPCTC "Use WARPCTC Plugins" OFF)
 option(USE_PLUGIN_CAFFE "Use Caffe Plugin" OFF)
@@ -381,6 +382,16 @@ if(USE_JEMALLOC)
   endif()
 endif()
 
+if(USE_LIBJPEG_TURBO)
+  find_package(PkgConfig REQUIRED)
+  pkg_search_module(TURBOJPEG REQUIRED libturbojpeg)
+  include_directories(SYSTEM ${TURBOJPEG_INCLUDE_DIRS})
+  list(APPEND mxnet_LINKER_LIBS ${TURBOJPEG_LINK_LIBRARIES})
+  add_definitions(-DMXNET_USE_LIBJPEG_TURBO=1)
+else()
+  add_definitions(-DMXNET_USE_LIBJPEG_TURBO=0)
+endif()
+
 # ---[ OpenCV
 if(USE_OPENCV)
   find_package(OpenCV COMPONENTS core highgui imgproc imgcodecs)
@@ -661,7 +672,7 @@ add_subdirectory("3rdparty/mshadow")
 
 set(MXNET_INSTALL_TARGETS mxnet)
 if(UNIX)
-  string(APPEND CMAKE_CUDA_FLAGS "${CUDA_ARCH_FLAGS_SPACES}")
+  string(APPEND CMAKE_CUDA_FLAGS " ${CUDA_ARCH_FLAGS_SPACES}")
   # Create dummy file since we want an empty shared library before linking
   set(DUMMY_SOURCE ${CMAKE_BINARY_DIR}/dummy.c)
   file(WRITE ${DUMMY_SOURCE} "")
@@ -673,6 +684,15 @@ if(UNIX)
   target_link_libraries(mxnet PRIVATE mxnet_static)
   target_link_libraries(mxnet_static PUBLIC ${CMAKE_DL_LIBS})
   set_target_properties(mxnet_static PROPERTIES OUTPUT_NAME mxnet)
+  if(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo")
+    target_compile_options(mxnet_static PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:-Werror>")
+    # Ignore erroneous compiler warnings:
+    # 1) variables used in '#pragma omp parallel' are considered unused
+    target_compile_options(mxnet_static PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:-Wno-error=unused-variable>")
+    if(USE_CUDA)
+      string(APPEND CMAKE_CUDA_FLAGS " -Werror cross-execution-space-call")
+    endif()
+  endif()
 elseif(MSVC)
   if(USE_CUDA)
     if(MSVC)
@@ -708,7 +728,7 @@ elseif(MSVC)
           COMMAND gen_warp $<TARGET_FILE:mxnet_${mxnet_first_arch}> WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/ DEPENDS $<TARGET_FILE:mxnet_${mxnet_first_arch}>)
       else(USE_SPLIT_ARCH_DLL)
         string(REPLACE ";" " " NVCC_FLAGS_ARCH "${NVCC_FLAGS_ARCH}")
-        set(CMAKE_CUDA_FLAGS "${CUDA_ARCH_FLAGS_SPACES}")
+        set(CMAKE_CUDA_FLAGS " ${CUDA_ARCH_FLAGS_SPACES}")
         add_library(mxnet SHARED ${SOURCE})
         target_link_libraries(mxnet PUBLIC mshadow)
         target_compile_options(
@@ -778,14 +798,7 @@ endfunction()
 if(USE_TVM_OP)
   list(APPEND mxnet_LINKER_LIBS ${CMAKE_CURRENT_BINARY_DIR}/3rdparty/tvm/libtvm_runtime.so)
   BuildTVMOP()
-  if(NOT Python3_EXECUTABLE)
-    find_package(PythonInterp 3 REQUIRED)
-    set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE} CACHE FILEPATH "Path to the python3 executable")
-    if(NOT Python3_EXECUTABLE)
-      message(FATAL_ERROR "No python3 interpreter found to build TVM operators")
-    endif()
-  endif()
-
+  find_package(Python3 REQUIRED)
   set(TVM_OP_COMPILE_OPTIONS "-o${CMAKE_CURRENT_BINARY_DIR}" "--config" "${CMAKE_CURRENT_BINARY_DIR}/tvmop.conf" "-L" "${CMAKE_CURRENT_BINARY_DIR}/3rdparty/tvm")
   if(USE_CUDA)
     set(TVM_OP_COMPILE_OPTIONS "${TVM_OP_COMPILE_OPTIONS}" "--cuda-arch" "\"${CUDA_ARCH_FLAGS}\"")
@@ -904,13 +917,10 @@ endif()
 add_subdirectory(tests)
 
 # ---[ Linter target
-if(MSVC)
-  find_package(PythonInterp)
-  set(PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE} CACHE FILEPATH "Path to the python executable")
-endif()
+find_package(Python3)
 set(LINT_DIRS "include src plugin cpp-package tests")
 set(EXCLUDE_PATH "src/operator/contrib/ctc_include")
-add_custom_target(mxnet_lint COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DPYTHON_EXECUTABLE=${PYTHON_EXECUTABLE} -DLINT_DIRS=${LINT_DIRS} -DPROJECT_SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR} -DPROJECT_NAME=mxnet -DEXCLUDE_PATH=${EXCLUDE_PATH} -P ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/dmlc-core/cmake/lint.cmake)
+add_custom_target(mxnet_lint COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DPYTHON_EXECUTABLE=${Python3_EXECUTABLE} -DLINT_DIRS=${LINT_DIRS} -DPROJECT_SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR} -DPROJECT_NAME=mxnet -DEXCLUDE_PATH=${EXCLUDE_PATH} -P ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/dmlc-core/cmake/lint.cmake)
 
 if(BUILD_CYTHON_MODULES)
   include(cmake/BuildCythonModules.cmake)

@@ -223,6 +223,8 @@ ifeq (,$(wildcard /lib/liblapack.a))
 ifeq (,$(wildcard /lib/liblapack.so))
 ifeq (,$(wildcard /usr/lib/liblapack.a))
 ifeq (,$(wildcard /usr/lib/liblapack.so))
+ifeq (,$(wildcard /usr/lib/x86_64-linux-gnu/liblapack.a))
+ifeq (,$(wildcard /usr/lib/x86_64-linux-gnu/liblapack.so))
 ifeq (,$(wildcard /usr/lib/liblapack.dylib))
 ifeq (,$(wildcard /usr/lib64/liblapack.a))
 ifeq (,$(wildcard /usr/lib64/liblapack.so))
@@ -240,6 +242,8 @@ endif
 endif
 endif
 endif
+endif
+endif
 
 # lapack settings.
 ifeq ($(USE_LAPACK), 1)

@@ -1,7 +1,7 @@
 Package: mxnet
 Type: Package
 Title: MXNet: A Flexible and Efficient Machine Learning Library for Heterogeneous Distributed Systems
-Version: 1.6.0
+Version: 1.7.0
 Date: 2017-06-27
 Author: Tianqi Chen, Qiang Kou, Tong He, Anirudh Acharya <https://github.com/anirudhacharya>
 Maintainer: Qiang Kou <qkou@qkou.info>

@@ -1,5 +1,5 @@
 rcpplint:
-	3rdparty/dmlc-core/scripts/lint.py mxnet-rcpp ${LINT_LANG} R-package/src
+	./3rdparty/dmlc-core/scripts/lint.py mxnet-rcpp all R-package/src
 
 rpkg:
 	mkdir -p R-package/inst/libs

@@ -109,9 +109,9 @@ mx.opt.sgd <- function(learning.rate = 0.01,
 #'
 #' @param learning.rate float, default=0.002
 #'      The initial learning rate.
-#' @param gamma1 float, default=0.95
+#' @param rho float, default=0.95
 #'      decay factor of moving average for gradient, gradient^2.
-#' @param gamma2 float, default=0.9
+#' @param momentum float, default=0.9
 #'      "momentum" factor.
 #' @param epsilon float, default=1e-4
 #' @param wd float, default=0.0
@@ -125,8 +125,8 @@ mx.opt.sgd <- function(learning.rate = 0.01,
 #'
 mx.opt.rmsprop <- function(learning.rate = 0.002,
                            centered = TRUE,
-                           gamma1 = 0.95,
-                           gamma2 = 0.9,
+                           rho = 0.95,
+                           momentum = 0.9,
                            epsilon = 1e-4,
                            wd = 0,
                            rescale.grad = 1,
@@ -158,8 +158,8 @@ mx.opt.rmsprop <- function(learning.rate = 0.002,
                                           g,
                                           delta,
                                           lr = lr,
-                                          gamma1 = gamma1,
-                                          gamma2 = gamma2,
+                                          rho = rho,
+                                          momentum = momentum,
                                           epsilon = epsilon,
                                           wd = wd,
                                           rescale_grad = rescale.grad,
@@ -174,7 +174,7 @@ mx.opt.rmsprop <- function(learning.rate = 0.002,
                                       grad,
                                       n,
                                       lr = lr,
-                                      gamma1 = gamma1,
+                                      rho = rho,
                                       epsilon = epsilon,
                                       wd = wd,
                                       rescale_grad = rescale.grad,

@@ -73,8 +73,8 @@ test_that("rmsprop", {
     fc1_weight = w1, label = y), aux.arrays = NULL, grad.reqs = c("null", "write", 
     "null"))
 
-  optimizer <- mx.opt.create("rmsprop", learning.rate = 1, centered = TRUE, gamma1 = 0.95, 
-    gamma2 = 0.9, epsilon = 1e-04, wd = 0, rescale.grad = 1, clip_gradient = -1)
+  optimizer <- mx.opt.create("rmsprop", learning.rate = 1, centered = TRUE, rho = 0.95,
+    momentum = 0.9, epsilon = 1e-04, wd = 0, rescale.grad = 1, clip_gradient = -1)
 
   updaters <- mx.opt.get.updater(optimizer, exec$ref.arg.arrays, ctx = mx.ctx.default())
 

diff --git a/benchmark/opperf/nd_operations/array_rearrange.py b/benchmark/opperf/nd_operations/array_rearrange.py
@@ -29,8 +29,8 @@
 """
 
 
-def run_rearrange_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
-    """Runs benchmarks with the given context and precision (dtype) for all the
+def run_rearrange_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', int64_tensor='off', warmup=25, runs=100):
+    """Runs benchmarks with the given context, precision (dtype), and input data size (int64_tensor) for all the
     rearrange operators in MXNet.
 
     Parameters
@@ -41,6 +41,8 @@ def run_rearrange_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='
         Precision to use for benchmarks
     profiler: str, default 'native'
         Type of Profiler to use (native/python)
+    int64_tensor: str, default 'off'
+        Input tensor size to use for tests (if on, dimensions >= 2**32)
     warmup: int, default 25
         Number of times to run for warmup
     runs: int, default 100
@@ -55,5 +57,5 @@ def run_rearrange_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='
     mx_rearrange_ops = get_all_rearrange_operators()
 
     # Run benchmarks
-    mx_rearrange_op_results = run_op_benchmarks(mx_rearrange_ops, dtype, ctx, profiler, warmup, runs)
+    mx_rearrange_op_results = run_op_benchmarks(mx_rearrange_ops, dtype, ctx, profiler, int64_tensor, warmup, runs)
     return mx_rearrange_op_results
diff --git a/benchmark/opperf/nd_operations/binary_operators.py b/benchmark/opperf/nd_operations/binary_operators.py
@@ -38,8 +38,8 @@
     get_all_elemen_wise_binary_operators, get_all_misc_binary_operators
 
 
-def run_mx_binary_misc_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
-    """Runs benchmarks with the given context and precision (dtype) for all the miscellaneous
+def run_mx_binary_misc_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', int64_tensor='off', warmup=25, runs=100):
+    """Runs benchmarks with the given context, precision (dtype), and input data size (int64_tensor) for all the miscellaneous
     binary operators in MXNet.
 
     Parameters
@@ -48,6 +48,10 @@ def run_mx_binary_misc_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profi
         Context to run benchmarks
     dtype: str, default 'float32'
         Precision to use for benchmarks
+    profiler: str, default 'native'
+        Type of Profiler to use (native/python)
+    int64_tensor: str, default 'off'
+        Input tensor size to use for tests (if on, dimensions >= 2**32)
     warmup: int, default 25
         Number of times to run for warmup
     runs: int, default 100
@@ -61,12 +65,12 @@ def run_mx_binary_misc_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profi
     # Fetch all Miscellaneous Binary Operators
     mx_binary_misc_ops = get_all_misc_binary_operators()
     # Run benchmarks
-    mx_binary_op_results = run_op_benchmarks(mx_binary_misc_ops, dtype, ctx, profiler, warmup, runs)
+    mx_binary_op_results = run_op_benchmarks(mx_binary_misc_ops, dtype, ctx, profiler, int64_tensor, warmup, runs)
     return mx_binary_op_results
 
 
-def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
-    """Runs benchmarks with the given context and precision (dtype) for all the binary
+def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', int64_tensor='off', warmup=25, runs=100):
+    """Runs benchmarks with the given context, precision (dtype), and input data size (int64_tensor) for all the binary
     broadcast operators in MXNet.
 
     Parameters
@@ -77,6 +81,8 @@ def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
         Precision to use for benchmarks
     profiler: str, default 'native'
         Type of Profiler to use (native/python)
+    int64_tensor: str, default 'off'
+        Input tensor size to use for tests (if on, dimensions >= 2**32)
     warmup: int, default 25
         Number of times to run for warmup
     runs: int, default 100
@@ -90,12 +96,12 @@ def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
     # Fetch all Binary Broadcast Operators
     mx_binary_broadcast_ops = get_all_broadcast_binary_operators()
     # Run benchmarks
-    mx_binary_op_results = run_op_benchmarks(mx_binary_broadcast_ops, dtype, ctx, profiler, warmup, runs)
+    mx_binary_op_results = run_op_benchmarks(mx_binary_broadcast_ops, dtype, ctx, profiler, int64_tensor, warmup, runs)
     return mx_binary_op_results
 
 
-def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
-    """Runs benchmarks with the given context and precision (dtype) for all the binary
+def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', int64_tensor='off', warmup=25, runs=100):
+    """Runs benchmarks with the given context, precision (dtype), and input data size (int64_tensor) for all the binary
     element_wise operators in MXNet.
 
     Parameters
@@ -106,6 +112,8 @@ def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32
         Precision to use for benchmarks
     profiler: str, default 'native'
         Type of Profiler to use (native/python)
+    int64_tensor: str, default 'off'
+        Input tensor size to use for tests (if on, dimensions >= 2**32)
     warmup: int, default 10
         Number of times to run for warmup
     runs: int, default 50
@@ -119,5 +127,5 @@ def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32
     # Fetch all Binary Element_wise Operators
     mx_binary_element_wise_ops = get_all_elemen_wise_binary_operators()
     # Run benchmarks
-    mx_binary_op_results = run_op_benchmarks(mx_binary_element_wise_ops, dtype, ctx, profiler, warmup, runs)
+    mx_binary_op_results = run_op_benchmarks(mx_binary_element_wise_ops, dtype, ctx, profiler, int64_tensor, warmup, runs)
     return mx_binary_op_results