Update unix gpu toolchain (apache#18186)

* update nvidiadocker command & remove cuda compat * replace cu101 with cuda since compat is no longer to be used * skip flaky tests * get rid of ubuntu_build_cuda and point ubuntu_cu101 to base gpu instead of cuda compat * Revert "skip flaky tests" This reverts commit 1c720fa. * revert removal of ubuntu_build_cuda * add linux gpu g4 node to all steps using g3 in unix-gpu pipeline
AntiZpvoh · Jul 6, 2020 · a10d7a5 · a10d7a5
1 parent 7b0edf3
commit a10d7a5
Show file tree

Hide file tree

Showing 6 changed files with 17 additions and 21 deletions.
diff --git a/ci/Jenkinsfile_utils.groovy b/ci/Jenkinsfile_utils.groovy
@@ -257,6 +257,7 @@ def assign_node_labels(args) {
   //    knowing about the limitations.
   NODE_LINUX_CPU = args.linux_cpu
   NODE_LINUX_GPU = args.linux_gpu
+  NODE_LINUX_GPU_G4 = args.linux_gpu_g4
   NODE_LINUX_GPU_P3 = args.linux_gpu_p3
   NODE_WINDOWS_CPU = args.windows_cpu
   NODE_WINDOWS_GPU = args.windows_gpu

diff --git a/ci/build.py b/ci/build.py
@@ -227,8 +227,9 @@ def container_run(docker_client: SafeDockerClient,
 
     # Equivalent command
     docker_cmd_list = [
-        "nvidia-docker" if nvidia_runtime else "docker",
+        "docker",
         'run',
+        "--gpus all" if nvidia_runtime else "",
         "--cap-add",
         "SYS_PTRACE", # Required by ASAN
         '--rm',

diff --git a/ci/docker/Dockerfile.build.ubuntu b/ci/docker/Dockerfile.build.ubuntu
@@ -164,9 +164,3 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
     apt install -y  --no-install-recommends \
         cuda-10-1 && \
     rm -rf /var/lib/apt/lists/*
-
-
-FROM gpu as gpuwithcompatenv
-# TVMOP requires /usr/local/cuda/compat is no LD_LIBRARY_PATH.
-# This should be fixed and deleted.
-ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/compat
diff --git a/ci/docker/docker-compose.yml b/ci/docker/docker-compose.yml
@@ -103,7 +103,7 @@ services:
     build:
       context: .
       dockerfile: Dockerfile.build.ubuntu
-      target: gpuwithcompatenv
+      target: gpu
       args:
         BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
       cache_from:

diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
@@ -151,7 +151,7 @@ def compile_unix_int64_cpu(lib_name) {
 
 def compile_unix_int64_gpu(lib_name) {
     return ['GPU: USE_INT64_TENSOR_SIZE': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/build-gpu-int64') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
@@ -816,7 +816,7 @@ def test_unix_python3_mkl_cpu(lib_name) {
 
 def test_unix_python3_gpu(lib_name) {
     return ['Python3: GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-python3-gpu') {
           try {
             utils.unpack_and_init(lib_name, mx_lib_cython)
@@ -916,7 +916,7 @@ def test_unix_python3_mkldnn_mkl_cpu(lib_name) {
 
 def test_unix_python3_mkldnn_gpu(lib_name) {
     return ['Python3: MKLDNN-GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-python3-mkldnn-gpu') {
           try {
             utils.unpack_and_init(lib_name, mx_mkldnn_lib)
@@ -932,7 +932,7 @@ def test_unix_python3_mkldnn_gpu(lib_name) {
 
 def test_unix_python3_mkldnn_nocudnn_gpu(lib_name) {
     return ['Python3: MKLDNN-GPU-NOCUDNN': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-python3-mkldnn-gpu-nocudnn') {
           try {
             utils.unpack_and_init(lib_name, mx_mkldnn_lib)
@@ -966,7 +966,7 @@ def test_unix_python3_tensorrt_gpu(lib_name) {
 
 def test_unix_python3_integration_gpu(lib_name) {
     return ['Python Integration GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/it-python-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_lib)
@@ -980,7 +980,7 @@ def test_unix_python3_integration_gpu(lib_name) {
 
 def test_unix_cpp_package_gpu(lib_name) {
     return ['cpp-package GPU Makefile': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/it-cpp-package') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_lib_cpp_examples_make)
@@ -994,7 +994,7 @@ def test_unix_cpp_package_gpu(lib_name) {
 
 def test_unix_capi_cpp_package(lib_name) {
     return ['capi-cpp-package GPU Makefile': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/it-capi-cpp-package') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_lib_cpp_capi_make)
@@ -1036,7 +1036,7 @@ def test_unix_scala_mkldnn_cpu(lib_name){
 
 def test_unix_scala_gpu(lib_name) {
     return ['Scala: GPU Makefile': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-scala-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_lib_make)
@@ -1119,7 +1119,7 @@ def test_unix_perl_cpu(lib_name) {
 
 def test_unix_cpp_gpu(lib_name) {
     return ['Cpp: GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-cpp-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_cmake_lib)
@@ -1147,7 +1147,7 @@ def test_unix_cpp_cpu(lib_name) {
 
 def test_unix_perl_gpu(lib_name) {
     return ['Perl: GPU Makefile': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-perl-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_lib_make)
@@ -1161,7 +1161,7 @@ def test_unix_perl_gpu(lib_name) {
 
 def test_unix_r_gpu(lib_name) {
     return ['R: GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/ut-r-gpu') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_lib)
@@ -1229,7 +1229,7 @@ def test_unix_distributed_kvstore_cpu(lib_name) {
 
 def test_unix_distributed_kvstore_gpu(lib_name) {
     return ['dist-kvstore tests GPU': {
-      node(NODE_LINUX_GPU) {
+      node(NODE_LINUX_GPU_G4) {
         ws('workspace/it-dist-kvstore') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.unpack_and_init(lib_name, mx_lib)

diff --git a/ci/jenkins/Jenkinsfile_unix_gpu b/ci/jenkins/Jenkinsfile_unix_gpu
@@ -29,7 +29,7 @@ node('utility') {
   utils = load('ci/Jenkinsfile_utils.groovy')
   custom_steps = load('ci/jenkins/Jenkins_steps.groovy')
 }
-utils.assign_node_labels(utility: 'utility', linux_cpu: 'mxnetlinux-cpu', linux_gpu: 'mxnetlinux-gpu', linux_gpu_p3: 'mxnetlinux-gpu-p3')
+utils.assign_node_labels(utility: 'utility', linux_cpu: 'mxnetlinux-cpu', linux_gpu: 'mxnetlinux-gpu', linux_gpu_p3: 'mxnetlinux-gpu-p3', linux_gpu_g4: 'mxnetlinux-gpu-g4')
 
 utils.main_wrapper(
 core_logic: {