Skip to content

Releases: ppwwyyxx/tensorflow-wheels

TensorFlow-nv 1.15.4, Python 3.8, CUDA 11.0, CuDNN 8, AVX2 MKL XLA

17 Jan 00:33
e401f09
Compare
Choose a tag to compare

Built from master of https://github.com/NVIDIA/tensorflow/ with the following patch:

diff --git i/tensorflow/core/kernels/mkl_concat_op.cc w/tensorflow/core/kernels/mkl_concat_op.cc
index 45f69309d9..7d7e849923 100644
--- i/tensorflow/core/kernels/mkl_concat_op.cc
+++ w/tensorflow/core/kernels/mkl_concat_op.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include <vector>
 
 #include "mkldnn.hpp"
+#define EIGEN_USE_THREADS
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/bounds_check.h"
 #include "tensorflow/core/framework/op_kernel.h"
diff --git i/tensorflow/core/kernels/quantization_utils.h w/tensorflow/core/kernels/quantization_utils.h
index 99efa28e2e..071b0383a3 100644
--- i/tensorflow/core/kernels/quantization_utils.h
+++ w/tensorflow/core/kernels/quantization_utils.h
@@ -32,6 +32,7 @@ limitations under the License.
 
 #include <array>
 
+#define EIGEN_USE_THREADS
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #define GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
 #include "public/gemmlowp.h"
diff --git i/tensorflow/core/nccl/nccl_manager.cc w/tensorflow/core/nccl/nccl_manager.cc
index 6b9fc10b87..665333a3ea 100644
--- i/tensorflow/core/nccl/nccl_manager.cc
+++ w/tensorflow/core/nccl/nccl_manager.cc
@@ -588,7 +588,7 @@ void NcclManager::RunCollective(Collective* collective) {
       // Wait to ensure that the kernel that produces the data in the input
       // tensor has finished running before the nccl kernel runs on the
       // communication stream.
-      nccl_stream->stream->ThenWaitFor(p->input_event.get());
+      nccl_stream->stream->ThenWaitFor(p->tensor_stream);
     }
     if (p->root) {
       if (collective->root_rank == -1) {
diff --git i/tensorflow/python/lib/core/bfloat16.cc w/tensorflow/python/lib/core/bfloat16.cc
index 54be76375c..21c4c55ef7 100644
--- i/tensorflow/python/lib/core/bfloat16.cc
+++ w/tensorflow/python/lib/core/bfloat16.cc
@@ -630,26 +630,26 @@ bool Initialize() {
   const std::array<int, 3> compare_types = {
       {npy_bfloat16_, npy_bfloat16_, NPY_BOOL}};
 
-  if (!register_ufunc("equal", CompareUFunc<Bfloat16EqFunctor>,
+  if (!register_ufunc("equal", (PyUFuncGenericFunction) CompareUFunc<Bfloat16EqFunctor>,
                       compare_types)) {
     return false;
   }
-  if (!register_ufunc("not_equal", CompareUFunc<Bfloat16NeFunctor>,
+  if (!register_ufunc("not_equal", (PyUFuncGenericFunction) CompareUFunc<Bfloat16NeFunctor>,
                       compare_types)) {
     return false;
   }
-  if (!register_ufunc("less", CompareUFunc<Bfloat16LtFunctor>, compare_types)) {
+  if (!register_ufunc("less", (PyUFuncGenericFunction) CompareUFunc<Bfloat16LtFunctor>, compare_types)) {
     return false;
   }
-  if (!register_ufunc("greater", CompareUFunc<Bfloat16GtFunctor>,
+  if (!register_ufunc("greater", (PyUFuncGenericFunction) CompareUFunc<Bfloat16GtFunctor>,
                       compare_types)) {
     return false;
   }
-  if (!register_ufunc("less_equal", CompareUFunc<Bfloat16LeFunctor>,
+  if (!register_ufunc("less_equal", (PyUFuncGenericFunction) CompareUFunc<Bfloat16LeFunctor>,
                       compare_types)) {
     return false;
   }
-  if (!register_ufunc("greater_equal", CompareUFunc<Bfloat16GeFunctor>,
+  if (!register_ufunc("greater_equal", (PyUFuncGenericFunction) CompareUFunc<Bfloat16GeFunctor>,
                       compare_types)) {
     return false;
   }
diff --git i/third_party/gpus/cuda_configure.bzl w/third_party/gpus/cuda_configure.bzl
index 1bd7141a37..4a4fd8e4c7 100644
--- i/third_party/gpus/cuda_configure.bzl
+++ w/third_party/gpus/cuda_configure.bzl
@@ -1148,7 +1148,7 @@ def _create_local_cuda_repository(repository_ctx):
         out_dir = "cuda/bin",
     ))
 
-    if [int(x) for x in cuda_config.cudnn_version.split(".")] < [8, 0]:
+    if False and [int(x) for x in cuda_config.cudnn_version.split(".")] < [8, 0]:
       cudnn_headers = ["cudnn.h"]
     else:
       cudnn_headers = ["cudnn_adv_infer.h",
@@ -1157,8 +1157,9 @@ def _create_local_cuda_repository(repository_ctx):
                        "cudnn_cnn_train.h",
                        "cudnn_ops_infer.h",
                        "cudnn_ops_train.h",
-                       "cudnn.h",          
+                       "cudnn.h",
                        "cudnn_version.h",
+                       "cudnn_backend.h",
                       ]
 
     cudnn_srcs = []

Bazel flags:

--config=opt --config=cuda --config=mkl --config=xla --config=nohdfs --config=noaws --config=nogcp  --config=numa

CUDA compute compatibility: 6.0,6.1,7.0,7.5

TensorFlow 2.3 (v1 compat), Python 3.8, CUDA 10.2, CUDNN 7, AVX2 MKL XLA

02 Sep 10:03
e401f09
Compare
Choose a tag to compare

Build from TensorFlow 2.3 with v1 compatible mode. The following bazel flags are used:

--config=v1 --config=xla --config=nohdfs --config=noaws --config=nogcp  --config=numa --config=mkl --config=cuda --config=opt 

cuda compute compatibility: 6.0,6.1,7.0

tensorflow/tensorflow#31481 was reverted to workaround tensorflow/tensorflow#41539

TensorFlow 2.3 (v1 compat), Python 3.8, CUDA 11.0, CUDNN 8, AVX2 MKL XLA

01 Sep 22:44
e401f09
Compare
Choose a tag to compare

Build from TensorFlow 2.3 with v1 compatible mode. The following bazel flags are used:

--config=v1 --config=xla --config=nohdfs --config=noaws --config=nogcp  --config=numa --config=mkl --config=cuda --config=opt 

cuda compute compatibility: 6.0,6.1,7.0,7.5

tensorflow/tensorflow#31481 was reverted to workaround tensorflow/tensorflow#41539

TensorFlow 2.2 (v1 compat), Python 3.7, CUDA 10.1 AVX2 MKL XLA

11 Jul 10:43
e401f09
Compare
Choose a tag to compare

Build from TensorFlow 2.2 with v1 compatible mode. The following bazel flags are used:

 --config=opt --config=cuda --config=mkl --config=xla --config=nohdfs --config=noaws --config=nogcp  --config=numa --config=v1 

cuda compute compatibility: 5.0, 6.0, 7.0

TensorFlow 1.15.3, Python 3.7, CUDA 10.1 AVX2 MKL XLA

12 Jul 05:45
e401f09
Compare
Choose a tag to compare

Bazel flags:

--config=opt --config=cuda --config=mkl --config=xla --config=nohdfs --config=noaws --config=nogcp  --config=numa

CUDA compute compatibility: 5.0,6.0,7.0

tensorflow/tensorflow#31481 was reverted to workaround tensorflow/tensorflow#41539

TensorFlow 1.15.2, Python 3.7, CUDA 10.2 AVX2 MKL XLA

21 Mar 10:20
e401f09
Compare
Choose a tag to compare
v0.4

Create tensorflow-1.15-cuda10.2-python3.8.diff

TensorFlow 1.15.2, Python 3.7, CUDA 9.2 AVX2 MKL XLA

18 Mar 07:48
e401f09
Compare
Choose a tag to compare
v0.3

Create tensorflow-1.15-cuda10.2-python3.8.diff

TensorFlow 1.15, Python 3.8, CUDA 10.2 AVX2 MKL XLA

06 Jan 02:54
e401f09
Compare
Choose a tag to compare

TensorFlow 1.14, Python 3.7, CUDA 10.1 AVX2 MKL XLA

12 Jul 07:49
e401f09
Compare
Choose a tag to compare

Bazel flags:

--config=opt --config=cuda --config=mkl --config=xla --config=nohdfs --config=noaws --config=nogcp  --config=numa

CUDA compute compatibility: 5.0,6.0,7.0

TensorFlow 1.14, Python 3.7, CUDA 9.2 AVX2 MKL XLA

12 Jul 07:51
ae6932b
Compare
Choose a tag to compare