Skip to content

Commit

Permalink
Merge pull request #118 from timokau/lazy-data-dimension
Browse files Browse the repository at this point in the history
Determine data dimensions lazily on fit instead on init
  • Loading branch information
timokau authored May 26, 2020
2 parents 1311bcb + 7c0eef0 commit 5bdc0e4
Show file tree
Hide file tree
Showing 45 changed files with 188 additions and 378 deletions.
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ method:

.. code-block:: python
fate = cs.FATEChoiceFunction(n_object_features=2)
fate = cs.FATEChoiceFunction()
fate.fit(X_train, Y_train)
Predictions can then be obtained using:
Expand Down
10 changes: 1 addition & 9 deletions csrank/choicefunction/cmpnet_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
class CmpNetChoiceFunction(CmpNetCore, ChoiceFunctions):
def __init__(
self,
n_object_features,
n_hidden=2,
n_units=8,
loss_function="binary_crossentropy",
Expand Down Expand Up @@ -49,8 +48,6 @@ def __init__(
Parameters
----------
n_object_features : int
Number of features of the object space
n_hidden : int
Number of hidden layers used in the scoring network
n_units : int
Expand Down Expand Up @@ -80,7 +77,6 @@ def __init__(
"""
super().__init__(
n_object_features=n_object_features,
n_hidden=n_hidden,
n_units=n_units,
loss_function=loss_function,
Expand All @@ -95,11 +91,7 @@ def __init__(
**kwargs
)
self.logger = logging.getLogger(CmpNetChoiceFunction.__name__)
self.logger.info(
"Initializing network with object features {}".format(
self.n_object_features
)
)
self.logger.info("Initializing network")
self.threshold = 0.5

def _convert_instances_(self, X, Y):
Expand Down
4 changes: 0 additions & 4 deletions csrank/choicefunction/fate_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
class FATEChoiceFunction(FATENetwork, ChoiceFunctions):
def __init__(
self,
n_object_features,
n_hidden_set_layers=2,
n_hidden_set_units=2,
n_hidden_joint_layers=32,
Expand Down Expand Up @@ -50,8 +49,6 @@ def __init__(
Parameters
----------
n_object_features : int
Dimensionality of the feature space of each object
n_hidden_set_layers : int
Number of set layers.
n_hidden_set_units : int
Expand Down Expand Up @@ -82,7 +79,6 @@ def __init__(
self.loss_function = loss_function
self.metrics = metrics
super().__init__(
n_object_features=n_object_features,
n_hidden_set_layers=n_hidden_set_layers,
n_hidden_set_units=n_hidden_set_units,
n_hidden_joint_layers=n_hidden_joint_layers,
Expand Down
8 changes: 0 additions & 8 deletions csrank/choicefunction/fatelinear_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
class FATELinearChoiceFunction(FATELinearCore, ChoiceFunctions):
def __init__(
self,
n_object_features,
n_objects,
n_hidden_set_units=2,
loss_function=binary_crossentropy,
learning_rate=1e-3,
Expand Down Expand Up @@ -41,10 +39,6 @@ def __init__(
Parameters
----------
n_object_features : int
Dimensionality of the feature space of each object
n_objects : int
Number of objects in each choice set
n_hidden_set_units : int
Number of hidden set units.
batch_size : int
Expand All @@ -57,8 +51,6 @@ def __init__(
Keyword arguments for the @FATENetwork
"""
super().__init__(
n_object_features=n_object_features,
n_objects=n_objects,
n_hidden_set_units=n_hidden_set_units,
learning_rate=learning_rate,
batch_size=batch_size,
Expand Down
20 changes: 7 additions & 13 deletions csrank/choicefunction/feta_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@
class FETAChoiceFunction(FETANetwork, ChoiceFunctions):
def __init__(
self,
n_objects,
n_object_features,
n_hidden=2,
n_units=8,
add_zeroth_order_model=False,
Expand Down Expand Up @@ -61,10 +59,6 @@ def __init__(
Parameters
----------
n_objects : int
Number of objects in each query set
n_object_features : int
Dimensionality of the feature space of each object
n_hidden : int
Number of hidden layers
n_units : int
Expand Down Expand Up @@ -97,8 +91,6 @@ def __init__(
Keyword arguments for the hidden units
"""
super().__init__(
n_objects=n_objects,
n_object_features=n_object_features,
n_hidden=n_hidden,
n_units=n_units,
add_zeroth_order_model=add_zeroth_order_model,
Expand All @@ -119,7 +111,9 @@ def __init__(
self.logger = logging.getLogger(FETAChoiceFunction.__name__)

def _construct_layers(self, **kwargs):
self.input_layer = Input(shape=(self.n_objects, self.n_object_features))
self.input_layer = Input(
shape=(self.n_objects_fit_, self.n_object_features_fit_)
)
# Todo: Variable sized input
# X = Input(shape=(None, n_features))
if self.batch_normalization:
Expand Down Expand Up @@ -177,7 +171,7 @@ def create_input_lambda(i):
self.logger.debug("Create 0th order model")
zeroth_order_outputs = []
inputs = []
for i in range(self.n_objects):
for i in range(self.n_objects_fit_):
x = create_input_lambda(i)(self.input_layer)
inputs.append(x)
for hidden in self.hidden_layers_zeroth:
Expand All @@ -186,8 +180,8 @@ def create_input_lambda(i):
zeroth_order_scores = concatenate(zeroth_order_outputs)
self.logger.debug("0th order model finished")
self.logger.debug("Create 1st order model")
outputs = [list() for _ in range(self.n_objects)]
for i, j in combinations(range(self.n_objects), 2):
outputs = [list() for _ in range(self.n_objects_fit_)]
for i, j in combinations(range(self.n_objects_fit_), 2):
if self._use_zeroth_model:
x1 = inputs[i]
x2 = inputs[j]
Expand Down Expand Up @@ -296,7 +290,7 @@ def fit(
self.threshold = 0.5

def sub_sampling(self, X, Y):
if self._n_objects <= self.max_number_of_objects:
if self.n_objects_fit_ <= self.max_number_of_objects:
return X, Y
n_objects = self.max_number_of_objects
bucket_size = int(X.shape[1] / n_objects)
Expand Down
8 changes: 0 additions & 8 deletions csrank/choicefunction/fetalinear_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
class FETALinearChoiceFunction(FETALinearCore, ChoiceFunctions):
def __init__(
self,
n_object_features,
n_objects,
loss_function=binary_crossentropy,
learning_rate=5e-3,
batch_size=256,
Expand Down Expand Up @@ -40,10 +38,6 @@ def __init__(
Parameters
----------
n_object_features : int
Dimensionality of the feature space of each object
n_objects : int
Number of objects in each choice set
n_hidden_set_units : int
Number of hidden set units.
batch_size : int
Expand All @@ -56,8 +50,6 @@ def __init__(
Keyword arguments for the @FATENetwork
"""
super().__init__(
n_object_features=n_object_features,
n_objects=n_objects,
learning_rate=learning_rate,
batch_size=batch_size,
loss_function=loss_function,
Expand Down
14 changes: 5 additions & 9 deletions csrank/choicefunction/generalized_linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,7 @@


class GeneralizedLinearModel(ChoiceFunctions, Learner):
def __init__(
self, n_object_features, regularization="l2", random_state=None, **kwargs
):
def __init__(self, regularization="l2", random_state=None, **kwargs):
"""
Create an instance of the GeneralizedLinearModel model for learning the choice function. This model is
adapted from the multinomial logit model :class:`csrank.discretechoice.multinomial_logit_model.MultinomialLogitModel`.
Expand All @@ -52,8 +50,6 @@ def __init__(
Parameters
----------
n_object_features : int
Number of features of the object space
regularization : string, optional
Regularization technique to be used for estimating the weights
random_state : int or object
Expand All @@ -68,7 +64,6 @@ def __init__(
[2] Kenneth Train. Qualitative choice analysis. Cambridge, MA: MIT Press, 1986
"""
self.logger = logging.getLogger(GeneralizedLinearModel.__name__)
self.n_object_features = n_object_features
if regularization in ["l1", "l2"]:
self.regularization = regularization
else:
Expand Down Expand Up @@ -156,8 +151,8 @@ def construct_model(self, X, Y):
with pm.Model() as self.model:
self.Xt = theano.shared(X)
self.Yt = theano.shared(Y)
shapes = {"weights": self.n_object_features}
# shapes = {'weights': (self.n_object_features, 3)}
shapes = {"weights": self.n_object_features_fit_}
# shapes = {'weights': (self.n_object_features_fit_, 3)}
weights_dict = create_weight_dictionary(self.model_configuration, shapes)
intercept = pm.Normal("intercept", mu=0, sd=10)
utility = tt.dot(self.Xt, weights_dict["weights"]) + intercept
Expand Down Expand Up @@ -274,14 +269,15 @@ def _fit(
},
**kwargs
):
_n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
self.construct_model(X, Y)
fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs)

def _predict_scores_fixed(self, X, **kwargs):
d = dict(pm.summary(self.trace)["mean"])
intercept = 0.0
weights = np.array(
[d["weights[{}]".format(i)] for i in range(self.n_object_features)]
[d["weights[{}]".format(i)] for i in range(self.n_object_features_fit_)]
)
if "intercept" in d:
intercept = intercept + d["intercept"]
Expand Down
13 changes: 3 additions & 10 deletions csrank/choicefunction/pairwise_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
class PairwiseSVMChoiceFunction(PairwiseSVM, ChoiceFunctions):
def __init__(
self,
n_object_features,
C=1.0,
tol=1e-4,
normalize=True,
Expand All @@ -30,8 +29,6 @@ def __init__(
Parameters
----------
n_object_features : int
Number of features of the object space
C : float, optional
Penalty parameter of the error term
tol : float, optional
Expand All @@ -54,7 +51,6 @@ def __init__(
"""
super().__init__(
n_object_features=n_object_features,
C=C,
tol=tol,
normalize=normalize,
Expand All @@ -63,11 +59,7 @@ def __init__(
**kwargs
)
self.logger = logging.getLogger(PairwiseSVMChoiceFunction.__name__)
self.logger.info(
"Initializing network with object features {}".format(
self.n_object_features
)
)
self.logger.info("Initializing network")
self.threshold = 0.5

def _convert_instances_(self, X, Y):
Expand All @@ -80,7 +72,7 @@ def _convert_instances_(self, X, Y):
y_single,
) = generate_complete_pairwise_dataset(X, Y)
del garbage
assert x_train.shape[1] == self.n_object_features
assert x_train.shape[1] == self.n_object_features_fit_
self.logger.debug(
"Finished the Dataset with instances {}".format(x_train.shape[0])
)
Expand All @@ -107,6 +99,7 @@ def fit(self, X, Y, tune_size=0.1, thin_thresholds=1, verbose=0, **kwd):
Keyword arguments for the fit function
"""
_n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
if tune_size > 0:
X_train, X_val, Y_train, Y_val = train_test_split(
X, Y, test_size=tune_size, random_state=self.random_state
Expand Down
10 changes: 1 addition & 9 deletions csrank/choicefunction/ranknet_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
class RankNetChoiceFunction(RankNetCore, ChoiceFunctions):
def __init__(
self,
n_object_features,
n_hidden=2,
n_units=8,
loss_function="binary_crossentropy",
Expand Down Expand Up @@ -40,8 +39,6 @@ def __init__(
Parameters
----------
n_object_features : int
Number of features of the object space
n_hidden : int
Number of hidden layers used in the scoring network
n_units : int
Expand Down Expand Up @@ -74,7 +71,6 @@ def __init__(
[2] Burges, C. J. (2010). "From ranknet to lambdarank to lambdamart: An overview.", Learning, 11(23-581).
"""
super().__init__(
n_object_features=n_object_features,
n_hidden=n_hidden,
n_units=n_units,
loss_function=loss_function,
Expand All @@ -89,11 +85,7 @@ def __init__(
**kwargs
)
self.logger = logging.getLogger(RankNetChoiceFunction.__name__)
self.logger.info(
"Initializing network with object features {}".format(
self.n_object_features
)
)
self.logger.info("Initializing network")
self.threshold = 0.5

def construct_model(self):
Expand Down
Loading

0 comments on commit 5bdc0e4

Please sign in to comment.