From d5b9ec5eb4c9c2dc0d2240cb8b60cf11ef2e5b16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Danie=CC=88l=20de=20Kok?= Date: Fri, 13 May 2022 11:02:24 +0200 Subject: [PATCH 1/3] Fix out-of-bounds writes in NumpyOps/CupyOps - Using `{CupyOps,NumpyOps}.adam` with incompatible shapes for weights, gradients, or moments resulted in out-of-bound writes. - Using `NumpyOps.adam` with non-float32 arrays resulted filling arrays with incorrect data. --- thinc/backends/cupy_ops.py | 10 ++++++++++ thinc/backends/numpy_ops.pyx | 9 +++++++-- thinc/backends/ops.py | 12 ++++++++++++ thinc/tests/backends/test_ops.py | 16 ++++++++++++++++ 4 files changed, 45 insertions(+), 2 deletions(-) diff --git a/thinc/backends/cupy_ops.py b/thinc/backends/cupy_ops.py index ec70cf72e..f30df15c0 100644 --- a/thinc/backends/cupy_ops.py +++ b/thinc/backends/cupy_ops.py @@ -290,6 +290,10 @@ def scatter_add(self, table, indices, values): def adam( self, weights, gradient, mom1, mom2, beta1, beta2, eps, learn_rate, mod_rate=1.0 ): + _check_compatible_shape(weights, gradient) + _check_compatible_shape(weights, mom1) + _check_compatible_shape(weights, mom2) + adam_kernel( gradient, learn_rate, 1 - beta1, 1 - beta2, eps, weights, mom1, mom2 ) @@ -312,3 +316,9 @@ def position_encode(self, N, D, period=10000, out=None): ) else: adam_kernel = None + + +def _check_compatible_shape(u, v): + if u.shape != v.shape: + msg = f"arrays have incompatible shapes: {u.shape} and {v.shape}" + raise ValueError(msg) diff --git a/thinc/backends/numpy_ops.pyx b/thinc/backends/numpy_ops.pyx index 2b618bdd4..63e9005df 100644 --- a/thinc/backends/numpy_ops.pyx +++ b/thinc/backends/numpy_ops.pyx @@ -459,9 +459,14 @@ class NumpyOps(Ops): @cython.boundscheck(False) @cython.wraparound(False) - def adam(self, np.ndarray weights, np.ndarray gradient, np.ndarray mom1, - np.ndarray mom2, const float beta1, const float beta2, float eps, + def adam(self, np.ndarray[np.float32_t] weights, np.ndarray[np.float32_t] gradient, + np.ndarray[np.float32_t] mom1, np.ndarray[np.float32_t] mom2, + const float beta1, const float beta2, float eps, float learn_rate, float mod_rate=1.): + _check_compatible_shape(weights, gradient) + _check_compatible_shape(weights, mom1) + _check_compatible_shape(weights, mom2) + _adam_momentum(gradient.data, mom1.data, mom2.data, weights.shape[0], beta1, beta2, eps, learn_rate) VecVec.add_i(weights.data, diff --git a/thinc/backends/ops.py b/thinc/backends/ops.py index 4179b4842..e1f63dbbb 100644 --- a/thinc/backends/ops.py +++ b/thinc/backends/ops.py @@ -1112,13 +1112,19 @@ def adam( learn_rate: float, mod_rate: float = 1.0, ) -> Tuple[Floats1d, Floats1d, Floats1d, Floats1d]: + _check_compatible_shape(weights, gradient) + _check_compatible_shape(weights, mom1) + _check_compatible_shape(weights, mom2) + # Internals for optimizer mom1 *= beta1 mom2 *= beta2 + print(mom1.shape, mom2.shape, gradient.shape, weights.shape) mom1 += gradient * (1.0 - beta1) mom2 += gradient * gradient * (1.0 - beta2) # Here we assume learn rate is calculated by the caller. # cdef weight_t a_t = learn_rate * sqrt(1-beta2**hp.t) / (1-beta1**hp.t); + print(mom1.shape, mom2.shape, weights.shape) weights -= learn_rate * (mom1 / (mod_rate * self.xp.sqrt(mom2) + eps)) return weights, gradient, mom1, mom2 @@ -1570,3 +1576,9 @@ def gaussian_cdf(ops: Ops, X: FloatsType) -> FloatsType: def gaussian_pdf(ops: Ops, X: FloatsType) -> FloatsType: """Gaussian PDF for distribution with mean 0 and stdev 1.""" return INV_SQRT_2PI * ops.xp.exp(-0.5 * X * X) + + +def _check_compatible_shape(u: FloatsXd, v: FloatsXd): + if u.shape != v.shape: + msg = f"arrays have incompatible shapes: {u.shape} and {v.shape}" + raise ValueError(msg) diff --git a/thinc/tests/backends/test_ops.py b/thinc/tests/backends/test_ops.py index cdc319b91..e095142b1 100644 --- a/thinc/tests/backends/test_ops.py +++ b/thinc/tests/backends/test_ops.py @@ -127,6 +127,22 @@ def test_ops_consistency(op): assert str(p1) == str(p2), attr +@pytest.mark.parametrize("ops", ALL_OPS) +def test_adam_incorrect_inputs(ops): + one = ops.xp.zeros(1, dtype="f") + two = ops.xp.zeros(2, dtype="f") + + ops.adam(one, one, one, one, 0.0, 0.0, 0.0, 0.0) + with pytest.raises(ValueError): + ops.adam(two, one, one, one, 0.0, 0.0, 0.0, 0.0) + with pytest.raises(ValueError): + ops.adam(one, two, one, one, 0.0, 0.0, 0.0, 0.0) + with pytest.raises(ValueError): + ops.adam(one, one, two, one, 0.0, 0.0, 0.0, 0.0) + with pytest.raises(ValueError): + ops.adam(one, one, one, two, 0.0, 0.0, 0.0, 0.0) + + @pytest.mark.parametrize("ops", ALL_OPS) def test_alloc(ops): float_methods = (ops.alloc1f, ops.alloc2f, ops.alloc3f, ops.alloc4f) From 6f8c7e46fdca6f5e56b0136d03de2776435fa944 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Fri, 13 May 2022 11:26:14 +0200 Subject: [PATCH 2/3] Remove print debugging remnants Co-authored-by: Adriane Boyd --- thinc/backends/ops.py | 1 - 1 file changed, 1 deletion(-) diff --git a/thinc/backends/ops.py b/thinc/backends/ops.py index e1f63dbbb..503f4da87 100644 --- a/thinc/backends/ops.py +++ b/thinc/backends/ops.py @@ -1119,7 +1119,6 @@ def adam( # Internals for optimizer mom1 *= beta1 mom2 *= beta2 - print(mom1.shape, mom2.shape, gradient.shape, weights.shape) mom1 += gradient * (1.0 - beta1) mom2 += gradient * gradient * (1.0 - beta2) # Here we assume learn rate is calculated by the caller. From f32a70c38b1eb4ffec925de4cbc95c707b7f9c70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Fri, 13 May 2022 11:26:41 +0200 Subject: [PATCH 3/3] More print debugging remnants Co-authored-by: Adriane Boyd --- thinc/backends/ops.py | 1 - 1 file changed, 1 deletion(-) diff --git a/thinc/backends/ops.py b/thinc/backends/ops.py index 503f4da87..e5a232d9b 100644 --- a/thinc/backends/ops.py +++ b/thinc/backends/ops.py @@ -1123,7 +1123,6 @@ def adam( mom2 += gradient * gradient * (1.0 - beta2) # Here we assume learn rate is calculated by the caller. # cdef weight_t a_t = learn_rate * sqrt(1-beta2**hp.t) / (1-beta1**hp.t); - print(mom1.shape, mom2.shape, weights.shape) weights -= learn_rate * (mom1 / (mod_rate * self.xp.sqrt(mom2) + eps)) return weights, gradient, mom1, mom2