Releases: ppwwyyxx/tensorflow-wheels
TensorFlow-nv 1.15.4, Python 3.8, CUDA 11.0, CuDNN 8, AVX2 MKL XLA
Built from master of https://github.com/NVIDIA/tensorflow/ with the following patch:
diff --git i/tensorflow/core/kernels/mkl_concat_op.cc w/tensorflow/core/kernels/mkl_concat_op.cc
index 45f69309d9..7d7e849923 100644
--- i/tensorflow/core/kernels/mkl_concat_op.cc
+++ w/tensorflow/core/kernels/mkl_concat_op.cc
@@ -17,6 +17,7 @@ limitations under the License.
#include <vector>
#include "mkldnn.hpp"
+#define EIGEN_USE_THREADS
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/framework/bounds_check.h"
#include "tensorflow/core/framework/op_kernel.h"
diff --git i/tensorflow/core/kernels/quantization_utils.h w/tensorflow/core/kernels/quantization_utils.h
index 99efa28e2e..071b0383a3 100644
--- i/tensorflow/core/kernels/quantization_utils.h
+++ w/tensorflow/core/kernels/quantization_utils.h
@@ -32,6 +32,7 @@ limitations under the License.
#include <array>
+#define EIGEN_USE_THREADS
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#define GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
#include "public/gemmlowp.h"
diff --git i/tensorflow/core/nccl/nccl_manager.cc w/tensorflow/core/nccl/nccl_manager.cc
index 6b9fc10b87..665333a3ea 100644
--- i/tensorflow/core/nccl/nccl_manager.cc
+++ w/tensorflow/core/nccl/nccl_manager.cc
@@ -588,7 +588,7 @@ void NcclManager::RunCollective(Collective* collective) {
// Wait to ensure that the kernel that produces the data in the input
// tensor has finished running before the nccl kernel runs on the
// communication stream.
- nccl_stream->stream->ThenWaitFor(p->input_event.get());
+ nccl_stream->stream->ThenWaitFor(p->tensor_stream);
}
if (p->root) {
if (collective->root_rank == -1) {
diff --git i/tensorflow/python/lib/core/bfloat16.cc w/tensorflow/python/lib/core/bfloat16.cc
index 54be76375c..21c4c55ef7 100644
--- i/tensorflow/python/lib/core/bfloat16.cc
+++ w/tensorflow/python/lib/core/bfloat16.cc
@@ -630,26 +630,26 @@ bool Initialize() {
const std::array<int, 3> compare_types = {
{npy_bfloat16_, npy_bfloat16_, NPY_BOOL}};
- if (!register_ufunc("equal", CompareUFunc<Bfloat16EqFunctor>,
+ if (!register_ufunc("equal", (PyUFuncGenericFunction) CompareUFunc<Bfloat16EqFunctor>,
compare_types)) {
return false;
}
- if (!register_ufunc("not_equal", CompareUFunc<Bfloat16NeFunctor>,
+ if (!register_ufunc("not_equal", (PyUFuncGenericFunction) CompareUFunc<Bfloat16NeFunctor>,
compare_types)) {
return false;
}
- if (!register_ufunc("less", CompareUFunc<Bfloat16LtFunctor>, compare_types)) {
+ if (!register_ufunc("less", (PyUFuncGenericFunction) CompareUFunc<Bfloat16LtFunctor>, compare_types)) {
return false;
}
- if (!register_ufunc("greater", CompareUFunc<Bfloat16GtFunctor>,
+ if (!register_ufunc("greater", (PyUFuncGenericFunction) CompareUFunc<Bfloat16GtFunctor>,
compare_types)) {
return false;
}
- if (!register_ufunc("less_equal", CompareUFunc<Bfloat16LeFunctor>,
+ if (!register_ufunc("less_equal", (PyUFuncGenericFunction) CompareUFunc<Bfloat16LeFunctor>,
compare_types)) {
return false;
}
- if (!register_ufunc("greater_equal", CompareUFunc<Bfloat16GeFunctor>,
+ if (!register_ufunc("greater_equal", (PyUFuncGenericFunction) CompareUFunc<Bfloat16GeFunctor>,
compare_types)) {
return false;
}
diff --git i/third_party/gpus/cuda_configure.bzl w/third_party/gpus/cuda_configure.bzl
index 1bd7141a37..4a4fd8e4c7 100644
--- i/third_party/gpus/cuda_configure.bzl
+++ w/third_party/gpus/cuda_configure.bzl
@@ -1148,7 +1148,7 @@ def _create_local_cuda_repository(repository_ctx):
out_dir = "cuda/bin",
))
- if [int(x) for x in cuda_config.cudnn_version.split(".")] < [8, 0]:
+ if False and [int(x) for x in cuda_config.cudnn_version.split(".")] < [8, 0]:
cudnn_headers = ["cudnn.h"]
else:
cudnn_headers = ["cudnn_adv_infer.h",
@@ -1157,8 +1157,9 @@ def _create_local_cuda_repository(repository_ctx):
"cudnn_cnn_train.h",
"cudnn_ops_infer.h",
"cudnn_ops_train.h",
- "cudnn.h",
+ "cudnn.h",
"cudnn_version.h",
+ "cudnn_backend.h",
]
cudnn_srcs = []
Bazel flags:
--config=opt --config=cuda --config=mkl --config=xla --config=nohdfs --config=noaws --config=nogcp --config=numa
CUDA compute compatibility: 6.0,6.1,7.0,7.5
TensorFlow 2.3 (v1 compat), Python 3.8, CUDA 10.2, CUDNN 7, AVX2 MKL XLA
Build from TensorFlow 2.3 with v1 compatible mode. The following bazel flags are used:
--config=v1 --config=xla --config=nohdfs --config=noaws --config=nogcp --config=numa --config=mkl --config=cuda --config=opt
cuda compute compatibility: 6.0,6.1,7.0
tensorflow/tensorflow#31481 was reverted to workaround tensorflow/tensorflow#41539
TensorFlow 2.3 (v1 compat), Python 3.8, CUDA 11.0, CUDNN 8, AVX2 MKL XLA
Build from TensorFlow 2.3 with v1 compatible mode. The following bazel flags are used:
--config=v1 --config=xla --config=nohdfs --config=noaws --config=nogcp --config=numa --config=mkl --config=cuda --config=opt
cuda compute compatibility: 6.0,6.1,7.0,7.5
tensorflow/tensorflow#31481 was reverted to workaround tensorflow/tensorflow#41539
TensorFlow 2.2 (v1 compat), Python 3.7, CUDA 10.1 AVX2 MKL XLA
Build from TensorFlow 2.2 with v1 compatible mode. The following bazel flags are used:
--config=opt --config=cuda --config=mkl --config=xla --config=nohdfs --config=noaws --config=nogcp --config=numa --config=v1
cuda compute compatibility: 5.0, 6.0, 7.0
TensorFlow 1.15.3, Python 3.7, CUDA 10.1 AVX2 MKL XLA
Bazel flags:
--config=opt --config=cuda --config=mkl --config=xla --config=nohdfs --config=noaws --config=nogcp --config=numa
CUDA compute compatibility: 5.0,6.0,7.0
tensorflow/tensorflow#31481 was reverted to workaround tensorflow/tensorflow#41539
TensorFlow 1.15.2, Python 3.7, CUDA 10.2 AVX2 MKL XLA
v0.4 Create tensorflow-1.15-cuda10.2-python3.8.diff
TensorFlow 1.15.2, Python 3.7, CUDA 9.2 AVX2 MKL XLA
v0.3 Create tensorflow-1.15-cuda10.2-python3.8.diff
TensorFlow 1.15, Python 3.8, CUDA 10.2 AVX2 MKL XLA
TensorFlow 1.14, Python 3.7, CUDA 10.1 AVX2 MKL XLA
Bazel flags:
--config=opt --config=cuda --config=mkl --config=xla --config=nohdfs --config=noaws --config=nogcp --config=numa
CUDA compute compatibility: 5.0,6.0,7.0