Releases · ppwwyyxx/tensorflow-wheels

Built from master of https://github.com/NVIDIA/tensorflow/ with the following patch:

diff --git i/tensorflow/core/kernels/mkl_concat_op.cc w/tensorflow/core/kernels/mkl_concat_op.cc
index 45f69309d9..7d7e849923 100644
--- i/tensorflow/core/kernels/mkl_concat_op.cc
+++ w/tensorflow/core/kernels/mkl_concat_op.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include <vector>
 
 #include "mkldnn.hpp"
+#define EIGEN_USE_THREADS
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/bounds_check.h"
 #include "tensorflow/core/framework/op_kernel.h"
diff --git i/tensorflow/core/kernels/quantization_utils.h w/tensorflow/core/kernels/quantization_utils.h
index 99efa28e2e..071b0383a3 100644
--- i/tensorflow/core/kernels/quantization_utils.h
+++ w/tensorflow/core/kernels/quantization_utils.h
@@ -32,6 +32,7 @@ limitations under the License.
 
 #include <array>
 
+#define EIGEN_USE_THREADS
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #define GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
 #include "public/gemmlowp.h"
diff --git i/tensorflow/core/nccl/nccl_manager.cc w/tensorflow/core/nccl/nccl_manager.cc
index 6b9fc10b87..665333a3ea 100644
--- i/tensorflow/core/nccl/nccl_manager.cc
+++ w/tensorflow/core/nccl/nccl_manager.cc
@@ -588,7 +588,7 @@ void NcclManager::RunCollective(Collective* collective) {
       // Wait to ensure that the kernel that produces the data in the input
       // tensor has finished running before the nccl kernel runs on the
       // communication stream.
-      nccl_stream->stream->ThenWaitFor(p->input_event.get());
+      nccl_stream->stream->ThenWaitFor(p->tensor_stream);
     }
     if (p->root) {
       if (collective->root_rank == -1) {
diff --git i/tensorflow/python/lib/core/bfloat16.cc w/tensorflow/python/lib/core/bfloat16.cc
index 54be76375c..21c4c55ef7 100644
--- i/tensorflow/python/lib/core/bfloat16.cc
+++ w/tensorflow/python/lib/core/bfloat16.cc
@@ -630,26 +630,26 @@ bool Initialize() {
   const std::array<int, 3> compare_types = {
       {npy_bfloat16_, npy_bfloat16_, NPY_BOOL}};
 
-  if (!register_ufunc("equal", CompareUFunc<Bfloat16EqFunctor>,
+  if (!register_ufunc("equal", (PyUFuncGenericFunction) CompareUFunc<Bfloat16EqFunctor>,
                       compare_types)) {
     return false;
   }
-  if (!register_ufunc("not_equal", CompareUFunc<Bfloat16NeFunctor>,
+  if (!register_ufunc("not_equal", (PyUFuncGenericFunction) CompareUFunc<Bfloat16NeFunctor>,
                       compare_types)) {
     return false;
   }
-  if (!register_ufunc("less", CompareUFunc<Bfloat16LtFunctor>, compare_types)) {
+  if (!register_ufunc("less", (PyUFuncGenericFunction) CompareUFunc<Bfloat16LtFunctor>, compare_types)) {
     return false;
   }
-  if (!register_ufunc("greater", CompareUFunc<Bfloat16GtFunctor>,
+  if (!register_ufunc("greater", (PyUFuncGenericFunction) CompareUFunc<Bfloat16GtFunctor>,
                       compare_types)) {
     return false;
   }
-  if (!register_ufunc("less_equal", CompareUFunc<Bfloat16LeFunctor>,
+  if (!register_ufunc("less_equal", (PyUFuncGenericFunction) CompareUFunc<Bfloat16LeFunctor>,
                       compare_types)) {
     return false;
   }
-  if (!register_ufunc("greater_equal", CompareUFunc<Bfloat16GeFunctor>,
+  if (!register_ufunc("greater_equal", (PyUFuncGenericFunction) CompareUFunc<Bfloat16GeFunctor>,
                       compare_types)) {
     return false;
   }
diff --git i/third_party/gpus/cuda_configure.bzl w/third_party/gpus/cuda_configure.bzl
index 1bd7141a37..4a4fd8e4c7 100644
--- i/third_party/gpus/cuda_configure.bzl
+++ w/third_party/gpus/cuda_configure.bzl
@@ -1148,7 +1148,7 @@ def _create_local_cuda_repository(repository_ctx):
         out_dir = "cuda/bin",
     ))
 
-    if [int(x) for x in cuda_config.cudnn_version.split(".")] < [8, 0]:
+    if False and [int(x) for x in cuda_config.cudnn_version.split(".")] < [8, 0]:
       cudnn_headers = ["cudnn.h"]
     else:
       cudnn_headers = ["cudnn_adv_infer.h",
@@ -1157,8 +1157,9 @@ def _create_local_cuda_repository(repository_ctx):
                        "cudnn_cnn_train.h",
                        "cudnn_ops_infer.h",
                        "cudnn_ops_train.h",
-                       "cudnn.h",          
+                       "cudnn.h",
                        "cudnn_version.h",
+                       "cudnn_backend.h",
                       ]
 
     cudnn_srcs = []

Bazel flags:

--config=opt --config=cuda --config=mkl --config=xla --config=nohdfs --config=noaws --config=nogcp  --config=numa

CUDA compute compatibility: 6.0,6.1,7.0,7.5

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Releases: ppwwyyxx/tensorflow-wheels

TensorFlow-nv 1.15.4, Python 3.8, CUDA 11.0, CuDNN 8, AVX2 MKL XLA

TensorFlow 2.3 (v1 compat), Python 3.8, CUDA 10.2, CUDNN 7, AVX2 MKL XLA

TensorFlow 2.3 (v1 compat), Python 3.8, CUDA 11.0, CUDNN 8, AVX2 MKL XLA

TensorFlow 2.2 (v1 compat), Python 3.7, CUDA 10.1 AVX2 MKL XLA

TensorFlow 1.15.3, Python 3.7, CUDA 10.1 AVX2 MKL XLA

TensorFlow 1.15.2, Python 3.7, CUDA 10.2 AVX2 MKL XLA

TensorFlow 1.15.2, Python 3.7, CUDA 9.2 AVX2 MKL XLA

TensorFlow 1.15, Python 3.8, CUDA 10.2 AVX2 MKL XLA

TensorFlow 1.14, Python 3.7, CUDA 10.1 AVX2 MKL XLA

TensorFlow 1.14, Python 3.7, CUDA 9.2 AVX2 MKL XLA