Skip to content

Commit

Permalink
Require uninitialized optimizers for our learners
Browse files Browse the repository at this point in the history
An initialized optimizer is a tensorflow object, which (at least in
graph mode in tf1) is not deepcopy-able. Even if we were able to
deeocopy it, we probably wouldn't want to since it contains state.
Scikit-learn needs to be able to deepcopy an estimators arguments so
that it can create copies and derivatives of it.

Instead we require the uninitialized optimizer and its parameters to be
passed to our learners separately. The learner can then initialize the
optimizer as needed.
  • Loading branch information
timokau committed Jun 17, 2020
1 parent a1b36db commit 95a0ad2
Show file tree
Hide file tree
Showing 26 changed files with 171 additions and 116 deletions.
4 changes: 4 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ Unreleased
particular, the parameters nesterov, momentum and lr are now set to the
default values set by keras.

* All optimizers must now be passed in uninitialized. Optimizer parameters can
be set by passing `optimizer__{kwarg}` parameters to the learner. This
follows the scikit-learn and skorch standard.

1.2.1 (2020-06-08)
------------------

Expand Down
8 changes: 5 additions & 3 deletions csrank/choicefunction/cmpnet_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(
kernel_regularizer=l2(1e-4),
kernel_initializer="lecun_normal",
activation="relu",
optimizer=SGD(),
optimizer=SGD,
metrics=["binary_accuracy"],
batch_size=256,
random_state=None,
Expand Down Expand Up @@ -60,8 +60,10 @@ def __init__(
Regularizer function applied to all the hidden weight matrices.
activation : function or string
Type of activation function to use in each hidden layer
optimizer : function or string
Optimizer to use during stochastic gradient descent
optimizer: Class
Uninitialized optimizer class following the keras optimizer interface.
optimizer__{kwarg}
Arguments to be passed to the optimizer on initialization, such as optimizer__lr.
metrics : list
List of metrics to evaluate during training (can be non-differentiable)
batch_size : int
Expand Down
8 changes: 5 additions & 3 deletions csrank/choicefunction/fate_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def __init__(
activation="selu",
kernel_initializer="lecun_normal",
kernel_regularizer=l2(0.01),
optimizer=SGD(),
optimizer=SGD,
batch_size=256,
metrics=None,
random_state=None,
Expand Down Expand Up @@ -63,8 +63,10 @@ def __init__(
Initialization function for the weights of each hidden layer
kernel_regularizer : function or string
Regularizer to use in the hidden units
optimizer : string or function
Stochastic gradient optimizer
optimizer: Class
Uninitialized optimizer class following the keras optimizer interface.
optimizer__{kwarg}
Arguments to be passed to the optimizer on initialization, such as optimizer__lr.
batch_size : int
Batch size to use for training
loss_function : function
Expand Down
10 changes: 6 additions & 4 deletions csrank/choicefunction/feta_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def __init__(
kernel_regularizer=l2(1e-4),
kernel_initializer="lecun_normal",
activation="selu",
optimizer=SGD(),
optimizer=SGD,
metrics=["binary_accuracy"],
batch_size=256,
random_state=None,
Expand Down Expand Up @@ -79,8 +79,10 @@ def __init__(
Initialization function for the weights of each hidden layer
activation : string or function
Activation function to use in the hidden units
optimizer : string or function
Stochastic gradient optimizer
optimizer: Class
Uninitialized optimizer class following the keras optimizer interface.
optimizer__{kwarg}
Arguments to be passed to the optimizer on initialization, such as optimizer__lr.
metrics : list
List of evaluation metrics (can be non-differentiable)
batch_size : int
Expand Down Expand Up @@ -218,7 +220,7 @@ def create_input_lambda(i):
model = Model(inputs=self.input_layer, outputs=scores)
self.logger.debug("Compiling complete model...")
model.compile(
loss=self.loss_function, optimizer=self.optimizer, metrics=self.metrics
loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics
)
return model

Expand Down
8 changes: 5 additions & 3 deletions csrank/choicefunction/ranknet_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(
kernel_regularizer=l2(1e-4),
kernel_initializer="lecun_normal",
activation="relu",
optimizer=SGD(),
optimizer=SGD,
metrics=["binary_accuracy"],
batch_size=256,
random_state=None,
Expand Down Expand Up @@ -53,8 +53,10 @@ def __init__(
Initialization function for the weights of each hidden layer
activation : function or string
Type of activation function to use in each hidden layer
optimizer : function or string
Optimizer to use during stochastic gradient descent
optimizer: Class
Uninitialized optimizer class following the keras optimizer interface.
optimizer__{kwarg}
Arguments to be passed to the optimizer on initialization, such as optimizer__lr.
metrics : list
List of metrics to evaluate during training (can be non-differentiable)
batch_size : int
Expand Down
15 changes: 7 additions & 8 deletions csrank/core/cmpnet_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from keras import backend as K
from keras import Input
from keras import Model
from keras import optimizers
from keras.layers import concatenate
from keras.layers import Dense
from keras.optimizers import SGD
Expand All @@ -29,7 +28,7 @@ def __init__(
kernel_regularizer=l2(1e-4),
kernel_initializer="lecun_normal",
activation="relu",
optimizer=SGD(),
optimizer=SGD,
metrics=["binary_accuracy"],
batch_size=256,
random_state=None,
Expand All @@ -47,8 +46,7 @@ def __init__(
self.kernel_initializer = kernel_initializer
self.loss_function = loss_function

self.optimizer = optimizers.get(optimizer)
self._optimizer_config = self.optimizer.get_config()
self.optimizer = optimizer

self.n_hidden = n_hidden
self.n_units = n_units
Expand Down Expand Up @@ -97,6 +95,7 @@ def construct_model(self):
model: keras :class:`Model`
Neural network to learn the CmpNet utility score
"""
self._initialize_optimizer()
x1x2 = concatenate([self.x1, self.x2])
x2x1 = concatenate([self.x2, self.x1])
self.logger.debug("Creating the model")
Expand All @@ -110,7 +109,7 @@ def construct_model(self):
merged_output = concatenate([N_g, N_l])
model = Model(inputs=[self.x1, self.x2], outputs=merged_output)
model.compile(
loss=self.loss_function, optimizer=self.optimizer, metrics=self.metrics
loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics
)
return model

Expand Down Expand Up @@ -212,7 +211,7 @@ def clear_memory(self, **kwargs):
sess = tf.Session()
K.set_session(sess)

self.optimizer = self.optimizer.from_config(self._optimizer_config)
self._initialize_optimizer()
self._construct_layers(
kernel_regularizer=self.kernel_regularizer,
kernel_initializer=self.kernel_initializer,
Expand Down Expand Up @@ -255,8 +254,8 @@ def set_tunable_parameters(
self.n_units = n_units
self.kernel_regularizer = l2(reg_strength)
self.batch_size = batch_size
self.optimizer = self.optimizer.from_config(self._optimizer_config)
K.set_value(self.optimizer.lr, learning_rate)
self._initialize_optimizer()
K.set_value(self.optimizer_.lr, learning_rate)
self._construct_layers(
kernel_regularizer=self.kernel_regularizer,
kernel_initializer=self.kernel_initializer,
Expand Down
22 changes: 12 additions & 10 deletions csrank/core/fate_network.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import logging

from keras import optimizers
import keras.backend as K
from keras.layers import Dense
from keras.layers import Input
Expand Down Expand Up @@ -29,7 +28,7 @@ def __init__(
activation="selu",
kernel_initializer="lecun_normal",
kernel_regularizer=l2(0.01),
optimizer=SGD(),
optimizer=SGD,
batch_size=256,
random_state=None,
**kwargs,
Expand All @@ -50,8 +49,10 @@ def __init__(
Initialization function for the weights of each hidden layer
kernel_regularizer : function or string
Regularizer to use in the hidden units
optimizer : string or function
Stochastic gradient optimizer
optimizer: Class
Uninitialized optimizer class following the keras optimizer interface.
optimizer__{kwarg}
Arguments to be passed to the optimizer on initialization, such as optimizer__lr.
batch_size : int
Batch size to use for training
random_state : int or object
Expand All @@ -69,15 +70,15 @@ def __init__(
self.kernel_initializer = kernel_initializer
self.kernel_regularizer = kernel_regularizer
self.batch_size = batch_size
self.optimizer = optimizers.get(optimizer)
self._optimizer_config = self.optimizer.get_config()
self.optimizer = optimizer
self.joint_layers = None
self.scorer = None
keys = list(kwargs.keys())
for key in keys:
if key not in allowed_dense_kwargs:
del kwargs[key]
self.kwargs = kwargs
self._initialize_optimizer()
self._construct_layers(
activation=self.activation,
kernel_initializer=self.kernel_initializer,
Expand Down Expand Up @@ -167,8 +168,8 @@ def set_tunable_parameters(
self.kernel_regularizer = l2(reg_strength)
self.batch_size = batch_size
# Hack to fix memory leak:
self.optimizer = self.optimizer.from_config(self._optimizer_config)
K.set_value(self.optimizer.lr, learning_rate)
self._initialize_optimizer()
K.set_value(self.optimizer_.lr, learning_rate)

self._construct_layers(
activation=self.activation,
Expand Down Expand Up @@ -474,7 +475,7 @@ def construct_model(self, n_features, n_objects):
model = Model(inputs=input_layer, outputs=scores)

model.compile(
loss=self.loss_function, optimizer=self.optimizer, metrics=self.metrics
loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics
)
return model

Expand Down Expand Up @@ -536,6 +537,7 @@ def fit(
"""
self.random_state_ = check_random_state(self.random_state)
_n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
self._initialize_optimizer()
self._fit(
X=X,
Y=Y,
Expand Down Expand Up @@ -703,7 +705,7 @@ def clear_memory(self, n_objects=5, **kwargs):
K.clear_session()
sess = tf.Session()
K.set_session(sess)
self.optimizer = self.optimizer.from_config(self._optimizer_config)
self._initialize_optimizer()
self._construct_layers(
activation=self.activation,
kernel_initializer=self.kernel_initializer,
Expand Down
15 changes: 7 additions & 8 deletions csrank/core/feta_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from keras import backend as K
from keras import Input
from keras import Model
from keras import optimizers
from keras.layers import add
from keras.layers import concatenate
from keras.layers import Dense
Expand Down Expand Up @@ -36,7 +35,7 @@ def __init__(
kernel_regularizer=l2(1e-4),
kernel_initializer="lecun_normal",
activation="selu",
optimizer=SGD(),
optimizer=SGD,
metrics=None,
batch_size=256,
random_state=None,
Expand All @@ -54,8 +53,7 @@ def __init__(
self.num_subsample = num_subsample
self.batch_size = batch_size
self.hash_file = None
self.optimizer = optimizers.get(optimizer)
self._optimizer_config = self.optimizer.get_config()
self.optimizer = optimizer
self._use_zeroth_model = add_zeroth_order_model
self.n_hidden = n_hidden
self.n_units = n_units
Expand Down Expand Up @@ -251,7 +249,7 @@ def create_input_lambda(i):
model = Model(inputs=self.input_layer, outputs=scores)
self.logger.debug("Compiling complete model...")
model.compile(
loss=self.loss_function, optimizer=self.optimizer, metrics=self.metrics
loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics
)
return model

Expand Down Expand Up @@ -282,6 +280,7 @@ def fit(
Keyword arguments for the fit function
"""
_n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
self._initialize_optimizer()
self._construct_layers(
kernel_regularizer=self.kernel_regularizer,
kernel_initializer=self.kernel_initializer,
Expand Down Expand Up @@ -369,8 +368,8 @@ def set_tunable_parameters(
self.n_units = n_units
self.kernel_regularizer = l2(reg_strength)
self.batch_size = batch_size
self.optimizer = self.optimizer.from_config(self._optimizer_config)
K.set_value(self.optimizer.lr, learning_rate)
self._initialize_optimizer()
K.set_value(self.optimizer_.lr, learning_rate)
self._pairwise_model = None
self._zero_order_model = None
self._construct_layers(
Expand Down Expand Up @@ -402,7 +401,7 @@ def clear_memory(self, **kwargs):

self._pairwise_model = None
self._zero_order_model = None
self.optimizer = self.optimizer.from_config(self._optimizer_config)
self._initialize_optimizer()
self._construct_layers(
kernel_regularizer=self.kernel_regularizer,
kernel_initializer=self.kernel_initializer,
Expand Down
15 changes: 7 additions & 8 deletions csrank/core/ranknet_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from keras import backend as K
from keras import Input
from keras import Model
from keras import optimizers
from keras.layers import add
from keras.layers import Dense
from keras.layers import Lambda
Expand All @@ -28,7 +27,7 @@ def __init__(
kernel_regularizer=l2(1e-4),
kernel_initializer="lecun_normal",
activation="relu",
optimizer=SGD(),
optimizer=SGD,
metrics=["binary_accuracy"],
batch_size=256,
random_state=None,
Expand All @@ -41,8 +40,7 @@ def __init__(
self.kernel_regularizer = kernel_regularizer
self.kernel_initializer = kernel_initializer
self.loss_function = loss_function
self.optimizer = optimizers.get(optimizer)
self._optimizer_config = self.optimizer.get_config()
self.optimizer = optimizer
self.n_hidden = n_hidden
self.n_units = n_units
keys = list(kwargs.keys())
Expand Down Expand Up @@ -101,7 +99,7 @@ def construct_model(self):
output = self.output_node(merged_inputs)
model = Model(inputs=[self.x1, self.x2], outputs=output)
model.compile(
loss=self.loss_function, optimizer=self.optimizer, metrics=self.metrics
loss=self.loss_function, optimizer=self.optimizer_, metrics=self.metrics
)
return model

Expand Down Expand Up @@ -147,6 +145,7 @@ def fit(
self.logger.debug("Instances created {}".format(X1.shape[0]))
self.logger.debug("Creating the model")

self._initialize_optimizer()
self._construct_layers(
kernel_regularizer=self.kernel_regularizer,
kernel_initializer=self.kernel_initializer,
Expand Down Expand Up @@ -217,7 +216,7 @@ def clear_memory(self, **kwargs):
K.set_session(sess)

self._scoring_model = None
self.optimizer = self.optimizer.from_config(self._optimizer_config)
self._initialize_optimizer()
self._construct_layers(
kernel_regularizer=self.kernel_regularizer,
kernel_initializer=self.kernel_initializer,
Expand Down Expand Up @@ -260,8 +259,8 @@ def set_tunable_parameters(
self.n_units = n_units
self.kernel_regularizer = l2(reg_strength)
self.batch_size = batch_size
self.optimizer = self.optimizer.from_config(self._optimizer_config)
K.set_value(self.optimizer.lr, learning_rate)
self._initialize_optimizer()
K.set_value(self.optimizer_.lr, learning_rate)
self._scoring_model = None
self._construct_layers(
kernel_regularizer=self.kernel_regularizer,
Expand Down
Loading

0 comments on commit 95a0ad2

Please sign in to comment.