From d5b9ec5eb4c9c2dc0d2240cb8b60cf11ef2e5b16 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Danie=CC=88l=20de=20Kok?= <me@danieldk.eu>
Date: Fri, 13 May 2022 11:02:24 +0200
Subject: [PATCH 1/3] Fix out-of-bounds writes in NumpyOps/CupyOps

- Using `{CupyOps,NumpyOps}.adam` with incompatible shapes for weights,
  gradients, or moments resulted in out-of-bound writes.
- Using `NumpyOps.adam` with non-float32 arrays resulted filling arrays
  with incorrect data.
---
 thinc/backends/cupy_ops.py       | 10 ++++++++++
 thinc/backends/numpy_ops.pyx     |  9 +++++++--
 thinc/backends/ops.py            | 12 ++++++++++++
 thinc/tests/backends/test_ops.py | 16 ++++++++++++++++
 4 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/thinc/backends/cupy_ops.py b/thinc/backends/cupy_ops.py
index ec70cf72e..f30df15c0 100644
--- a/thinc/backends/cupy_ops.py
+++ b/thinc/backends/cupy_ops.py
@@ -290,6 +290,10 @@ def scatter_add(self, table, indices, values):
     def adam(
         self, weights, gradient, mom1, mom2, beta1, beta2, eps, learn_rate, mod_rate=1.0
     ):
+        _check_compatible_shape(weights, gradient)
+        _check_compatible_shape(weights, mom1)
+        _check_compatible_shape(weights, mom2)
+
         adam_kernel(
             gradient, learn_rate, 1 - beta1, 1 - beta2, eps, weights, mom1, mom2
         )
@@ -312,3 +316,9 @@ def position_encode(self, N, D, period=10000, out=None):
     )
 else:
     adam_kernel = None
+
+
+def _check_compatible_shape(u, v):
+    if u.shape != v.shape:
+        msg = f"arrays have incompatible shapes: {u.shape} and {v.shape}"
+        raise ValueError(msg)
diff --git a/thinc/backends/numpy_ops.pyx b/thinc/backends/numpy_ops.pyx
index 2b618bdd4..63e9005df 100644
--- a/thinc/backends/numpy_ops.pyx
+++ b/thinc/backends/numpy_ops.pyx
@@ -459,9 +459,14 @@ class NumpyOps(Ops):
 
     @cython.boundscheck(False)
     @cython.wraparound(False)
-    def adam(self, np.ndarray weights, np.ndarray gradient, np.ndarray mom1,
-             np.ndarray mom2, const float beta1, const float beta2, float eps,
+    def adam(self, np.ndarray[np.float32_t] weights, np.ndarray[np.float32_t] gradient,
+            np.ndarray[np.float32_t] mom1, np.ndarray[np.float32_t] mom2,
+            const float beta1, const float beta2, float eps,
             float learn_rate, float mod_rate=1.):
+        _check_compatible_shape(weights, gradient)
+        _check_compatible_shape(weights, mom1)
+        _check_compatible_shape(weights, mom2)
+
         _adam_momentum(<float*>gradient.data, <float*>mom1.data, <float*>mom2.data,
             weights.shape[0], beta1, beta2, eps, learn_rate)
         VecVec.add_i(<float*>weights.data,
diff --git a/thinc/backends/ops.py b/thinc/backends/ops.py
index 4179b4842..e1f63dbbb 100644
--- a/thinc/backends/ops.py
+++ b/thinc/backends/ops.py
@@ -1112,13 +1112,19 @@ def adam(
         learn_rate: float,
         mod_rate: float = 1.0,
     ) -> Tuple[Floats1d, Floats1d, Floats1d, Floats1d]:
+        _check_compatible_shape(weights, gradient)
+        _check_compatible_shape(weights, mom1)
+        _check_compatible_shape(weights, mom2)
+
         # Internals for optimizer
         mom1 *= beta1
         mom2 *= beta2
+        print(mom1.shape, mom2.shape, gradient.shape, weights.shape)
         mom1 += gradient * (1.0 - beta1)
         mom2 += gradient * gradient * (1.0 - beta2)
         # Here we assume learn rate is calculated by the caller.
         # cdef weight_t a_t = learn_rate * sqrt(1-beta2**hp.t) / (1-beta1**hp.t);
+        print(mom1.shape, mom2.shape, weights.shape)
         weights -= learn_rate * (mom1 / (mod_rate * self.xp.sqrt(mom2) + eps))
         return weights, gradient, mom1, mom2
 
@@ -1570,3 +1576,9 @@ def gaussian_cdf(ops: Ops, X: FloatsType) -> FloatsType:
 def gaussian_pdf(ops: Ops, X: FloatsType) -> FloatsType:
     """Gaussian PDF for distribution with mean 0 and stdev 1."""
     return INV_SQRT_2PI * ops.xp.exp(-0.5 * X * X)
+
+
+def _check_compatible_shape(u: FloatsXd, v: FloatsXd):
+    if u.shape != v.shape:
+        msg = f"arrays have incompatible shapes: {u.shape} and {v.shape}"
+        raise ValueError(msg)
diff --git a/thinc/tests/backends/test_ops.py b/thinc/tests/backends/test_ops.py
index cdc319b91..e095142b1 100644
--- a/thinc/tests/backends/test_ops.py
+++ b/thinc/tests/backends/test_ops.py
@@ -127,6 +127,22 @@ def test_ops_consistency(op):
                     assert str(p1) == str(p2), attr
 
 
+@pytest.mark.parametrize("ops", ALL_OPS)
+def test_adam_incorrect_inputs(ops):
+    one = ops.xp.zeros(1, dtype="f")
+    two = ops.xp.zeros(2, dtype="f")
+
+    ops.adam(one, one, one, one, 0.0, 0.0, 0.0, 0.0)
+    with pytest.raises(ValueError):
+        ops.adam(two, one, one, one, 0.0, 0.0, 0.0, 0.0)
+    with pytest.raises(ValueError):
+        ops.adam(one, two, one, one, 0.0, 0.0, 0.0, 0.0)
+    with pytest.raises(ValueError):
+        ops.adam(one, one, two, one, 0.0, 0.0, 0.0, 0.0)
+    with pytest.raises(ValueError):
+        ops.adam(one, one, one, two, 0.0, 0.0, 0.0, 0.0)
+
+
 @pytest.mark.parametrize("ops", ALL_OPS)
 def test_alloc(ops):
     float_methods = (ops.alloc1f, ops.alloc2f, ops.alloc3f, ops.alloc4f)

From 6f8c7e46fdca6f5e56b0136d03de2776435fa944 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= <me@github.danieldk.eu>
Date: Fri, 13 May 2022 11:26:14 +0200
Subject: [PATCH 2/3] Remove print debugging remnants

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
---
 thinc/backends/ops.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/thinc/backends/ops.py b/thinc/backends/ops.py
index e1f63dbbb..503f4da87 100644
--- a/thinc/backends/ops.py
+++ b/thinc/backends/ops.py
@@ -1119,7 +1119,6 @@ def adam(
         # Internals for optimizer
         mom1 *= beta1
         mom2 *= beta2
-        print(mom1.shape, mom2.shape, gradient.shape, weights.shape)
         mom1 += gradient * (1.0 - beta1)
         mom2 += gradient * gradient * (1.0 - beta2)
         # Here we assume learn rate is calculated by the caller.

From f32a70c38b1eb4ffec925de4cbc95c707b7f9c70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= <me@github.danieldk.eu>
Date: Fri, 13 May 2022 11:26:41 +0200
Subject: [PATCH 3/3] More print debugging remnants

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
---
 thinc/backends/ops.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/thinc/backends/ops.py b/thinc/backends/ops.py
index 503f4da87..e5a232d9b 100644
--- a/thinc/backends/ops.py
+++ b/thinc/backends/ops.py
@@ -1123,7 +1123,6 @@ def adam(
         mom2 += gradient * gradient * (1.0 - beta2)
         # Here we assume learn rate is calculated by the caller.
         # cdef weight_t a_t = learn_rate * sqrt(1-beta2**hp.t) / (1-beta1**hp.t);
-        print(mom1.shape, mom2.shape, weights.shape)
         weights -= learn_rate * (mom1 / (mod_rate * self.xp.sqrt(mom2) + eps))
         return weights, gradient, mom1, mom2