From b3239b1902b9cb26bcaed609ce839de1b838562a Mon Sep 17 00:00:00 2001 From: redwrasse Date: Thu, 30 Nov 2023 22:13:43 -0800 Subject: [PATCH 1/5] cleanup var_to_var --- .../experimental/tf2_utils_2x_wide.py | 33 ++++++++++++------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/official/modeling/fast_training/experimental/tf2_utils_2x_wide.py b/official/modeling/fast_training/experimental/tf2_utils_2x_wide.py index 26355bf3d16..54ea96b5d0b 100644 --- a/official/modeling/fast_training/experimental/tf2_utils_2x_wide.py +++ b/official/modeling/fast_training/experimental/tf2_utils_2x_wide.py @@ -109,8 +109,8 @@ def var_to_var(var_from: tf.Variable, epsilon: float): """Expands a variable to another variable. - Assume the shape of `var_from` is (a, b, ..., y, z), the shape of `var_to` - can be (a, ..., z * 2), (a * 2, ..., z * 2), (a * 2, ..., z) + Assuming the shape of `var_from` is (a, b, ..., y, z), then shape of `var_to` + must be one of (a, ..., z * 2), (a * 2, ..., z * 2), or (a * 2, ..., z). If the shape of `var_to` is (a, ..., 2 * z): For any x, tf.matmul(x, var_to) ~= expand_vector(tf.matmul(x, var_from)) / 2 @@ -131,21 +131,30 @@ def var_to_var(var_from: tf.Variable, if shape_from == shape_to: var_to.assign(var_from) + return - elif len(shape_from) == 1 and len(shape_to) == 1: - var_to.assign(expand_vector(var_from.numpy())) + var_from_np = var_from.numpy() - elif shape_from[0] * 2 == shape_to[0] and shape_from[-1] == shape_to[-1]: - var_to.assign(expand_1_axis(var_from.numpy(), epsilon=epsilon, axis=0)) + if len(shape_from) == len(shape_to) == 1: + var_to.assign(expand_vector(var_from_np)) + return - elif shape_from[0] == shape_to[0] and shape_from[-1] * 2 == shape_to[-1]: - var_to.assign(expand_1_axis(var_from.numpy(), epsilon=epsilon, axis=-1)) + a_from, z_from = shape_from[0], shape_from[-1] + a_to, z_to = shape_to[0], shape_to[-1] - elif shape_from[0] * 2 == shape_to[0] and shape_from[-1] * 2 == shape_to[-1]: - var_to.assign(expand_2_axes(var_from.numpy(), epsilon=epsilon)) + if a_to == 2 * a_from and z_to == z_from: + var_to.assign(expand_1_axis(var_from_np, epsilon=epsilon, axis=0)) + return - else: - raise ValueError("Shape not supported, {}, {}".format(shape_from, shape_to)) + if a_to == a_from and z_to == 2 * z_from: + var_to.assign(expand_1_axis(var_from_np, epsilon=epsilon, axis=-1)) + return + + if a_to == 2 * a_from and z_to == 2 * z_from: + var_to.assign(expand_2_axes(var_from_np, epsilon=epsilon)) + return + + raise ValueError("Shape not supported, {}, {}".format(shape_from, shape_to)) def model_to_model_2x_wide(model_from: tf.Module, From 57a6cbea7ac2ef822eeebefe6c64407e5cd4f2de Mon Sep 17 00:00:00 2001 From: redwrasse Date: Thu, 30 Nov 2023 22:26:49 -0800 Subject: [PATCH 2/5] wip various tf2_utils_2x_wide.py cleanup --- .../fast_training/experimental/tf2_utils_2x_wide.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/official/modeling/fast_training/experimental/tf2_utils_2x_wide.py b/official/modeling/fast_training/experimental/tf2_utils_2x_wide.py index 54ea96b5d0b..6142594bbf0 100644 --- a/official/modeling/fast_training/experimental/tf2_utils_2x_wide.py +++ b/official/modeling/fast_training/experimental/tf2_utils_2x_wide.py @@ -36,7 +36,7 @@ def expand_vector(v: np.ndarray) -> np.ndarray: def expand_1_axis(w: np.ndarray, epsilon: float, axis: int) -> np.ndarray: - """Expands either the first dimension or the last dimension of w. + """Expands either the first or last dimension of w. If `axis = 0`, the following constraint will be satisfied: matmul(x, w) == @@ -54,9 +54,12 @@ def expand_1_axis(w: np.ndarray, Returns: Expanded numpy array. """ - assert axis in (0, -1), ( - "Only support expanding the first or the last dimension. " - "Got: {}".format(axis)) + + if axis not in (0, -1): + raise ValueError( + "Only support expanding the first or the last dimension. " + "Got: {}".format(axis) + ) rank = len(w.shape) @@ -76,7 +79,7 @@ def expand_1_axis(w: np.ndarray, def expand_2_axes(w: np.ndarray, epsilon: float) -> np.ndarray: - """Expands the first dimension and the last dimension of w. + """Expands the first and last dimension of w. The following constraint will be satisfied: expand_vector(matmul(x, w)) == matmul(expand_vector(x), expand_2_axes(w)) From 479fa5316bb782abf455b969db9f4ac1c2c08c33 Mon Sep 17 00:00:00 2001 From: redwrasse Date: Thu, 30 Nov 2023 22:37:54 -0800 Subject: [PATCH 3/5] more wip refactor --- .../experimental/tf2_utils_2x_wide.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/official/modeling/fast_training/experimental/tf2_utils_2x_wide.py b/official/modeling/fast_training/experimental/tf2_utils_2x_wide.py index 6142594bbf0..87e7ff51e22 100644 --- a/official/modeling/fast_training/experimental/tf2_utils_2x_wide.py +++ b/official/modeling/fast_training/experimental/tf2_utils_2x_wide.py @@ -16,7 +16,7 @@ from absl import logging import numpy as np -import tensorflow as tf, tf_keras +import tensorflow as tf def expand_vector(v: np.ndarray) -> np.ndarray: @@ -38,11 +38,11 @@ def expand_1_axis(w: np.ndarray, axis: int) -> np.ndarray: """Expands either the first or last dimension of w. - If `axis = 0`, the following constraint will be satisfied: + If `axis = 0`, the following expression will be satisfied: matmul(x, w) == matmul(expand_vector(x), expand_1_axis(w, epsilon=0.1, axis=0)) - If `axis = -1`, the following constraint will be satisfied if `epsilon = 0.0`: + If `axis = -1` and `epsilon = 0.0`, the following constraint will be satisfied: expand_vector(matmul(x, w)) == 2 * matmul(x, expand_1_axis(w, epsilon=0.0, axis=-1)) @@ -68,7 +68,7 @@ def expand_1_axis(w: np.ndarray, sign_flip = np.array([1, -1]) for _ in range(rank - 1): - sign_flip = np.expand_dims(sign_flip, axis=-1 if axis == 0 else 0) + sign_flip = np.expand_dims(sign_flip, axis=axis-1) sign_flip = np.tile(sign_flip, [w.shape[0]] + [1] * (rank - 2) + [w.shape[-1]]) @@ -81,7 +81,7 @@ def expand_2_axes(w: np.ndarray, epsilon: float) -> np.ndarray: """Expands the first and last dimension of w. - The following constraint will be satisfied: + This operation satisfies the following expression: expand_vector(matmul(x, w)) == matmul(expand_vector(x), expand_2_axes(w)) Args: @@ -182,8 +182,8 @@ def model_to_model_2x_wide(model_from: tf.Module, assert model_narrow([[1, 2, 3]]) == model_wide([[1, 1, 2, 2, 3, 3]]) ``` - We assume that `model_from` and `model_to` has the same architecture and only - widths of them differ. + We assume that `model_from` and `model_to` have the same architecture and differ + only in widths. Args: model_from: input model to expand. From 9b8cc4e29f9546457adc55bf126280e26af904fb Mon Sep 17 00:00:00 2001 From: redwrasse Date: Thu, 30 Nov 2023 23:04:22 -0800 Subject: [PATCH 4/5] add test_relations --- .../experimental/tf2_utils_2x_wide.py | 2 +- .../experimental/tf2_utils_2x_wide_test.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/official/modeling/fast_training/experimental/tf2_utils_2x_wide.py b/official/modeling/fast_training/experimental/tf2_utils_2x_wide.py index 87e7ff51e22..2da80bb74b4 100644 --- a/official/modeling/fast_training/experimental/tf2_utils_2x_wide.py +++ b/official/modeling/fast_training/experimental/tf2_utils_2x_wide.py @@ -40,7 +40,7 @@ def expand_1_axis(w: np.ndarray, If `axis = 0`, the following expression will be satisfied: matmul(x, w) == - matmul(expand_vector(x), expand_1_axis(w, epsilon=0.1, axis=0)) + matmul(expand_vector(x), expand_1_axis(w, axis=0)) If `axis = -1` and `epsilon = 0.0`, the following constraint will be satisfied: expand_vector(matmul(x, w)) == diff --git a/official/modeling/fast_training/experimental/tf2_utils_2x_wide_test.py b/official/modeling/fast_training/experimental/tf2_utils_2x_wide_test.py index 59ad118cc6b..9fcd980b14e 100644 --- a/official/modeling/fast_training/experimental/tf2_utils_2x_wide_test.py +++ b/official/modeling/fast_training/experimental/tf2_utils_2x_wide_test.py @@ -71,6 +71,22 @@ def test_expand_3d_tensor_axis_2(self): o1 = np.matmul(x, w1) self.assertAllClose(o0, np.sum(o1.reshape(2, 2), axis=-1)) + def test_relations(self): + x = np.array([10, 11]) + w = np.random.rand(2, 2) + # matmul(x, w) == matmul(expand_vector(x), expand_1_axis(w, axis=0)) + lhs = np.matmul(x, w) + rhs = np.matmul(tf2_utils_2x_wide.expand_vector(x), tf2_utils_2x_wide.expand_1_axis(w, epsilon=0.1, axis=0)) + self.assertAllClose(lhs, rhs) + # expand_vector(matmul(x, w)) == 2 * matmul(x, expand_1_axis(w, epsilon=0.0, axis=-1)) + lhs = tf2_utils_2x_wide.expand_vector(np.matmul(x, w)) + rhs = 2 * np.matmul(x, tf2_utils_2x_wide.expand_1_axis(w, epsilon=0.0, axis=-1)) + self.assertAllClose(lhs, rhs) + # expand_vector(matmul(x, w)) == matmul(expand_vector(x), expand_2_axes(w)) + lhs = tf2_utils_2x_wide.expand_vector(np.matmul(x, w)) + rhs = np.matmul(tf2_utils_2x_wide.expand_vector(x), tf2_utils_2x_wide.expand_2_axes(w, epsilon=0.1)) + self.assertAllClose(lhs, rhs) + def test_end_to_end(self): """Covers expand_vector, expand_2_axes, and expand_1_axis.""" model_narrow = tf_keras.Sequential() From 610f1777235dfc5eff0a3031e3c761c0f218d522 Mon Sep 17 00:00:00 2001 From: redwrasse Date: Tue, 23 Jan 2024 01:58:27 -0500 Subject: [PATCH 5/5] fix tf_keras import --- .../modeling/fast_training/experimental/tf2_utils_2x_wide.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/official/modeling/fast_training/experimental/tf2_utils_2x_wide.py b/official/modeling/fast_training/experimental/tf2_utils_2x_wide.py index 2da80bb74b4..0b2f5b88d34 100644 --- a/official/modeling/fast_training/experimental/tf2_utils_2x_wide.py +++ b/official/modeling/fast_training/experimental/tf2_utils_2x_wide.py @@ -16,7 +16,7 @@ from absl import logging import numpy as np -import tensorflow as tf +import tensorflow as tf, tf_keras def expand_vector(v: np.ndarray) -> np.ndarray: