Merge pull request #118 from timokau/lazy-data-dimension

Determine data dimensions lazily on fit instead on init
kiudee · May 26, 2020 · 5bdc0e4 · 5bdc0e4
2 parents 1311bcb + 7c0eef0
commit 5bdc0e4
Show file tree

Hide file tree

Showing 45 changed files with 188 additions and 378 deletions.
diff --git a/README.rst b/README.rst
@@ -50,7 +50,7 @@ method:
 
 .. code-block:: python
 
-   fate = cs.FATEChoiceFunction(n_object_features=2)
+   fate = cs.FATEChoiceFunction()
    fate.fit(X_train, Y_train)
 
 Predictions can then be obtained using:

diff --git a/csrank/choicefunction/cmpnet_choice.py b/csrank/choicefunction/cmpnet_choice.py
@@ -12,7 +12,6 @@
 class CmpNetChoiceFunction(CmpNetCore, ChoiceFunctions):
     def __init__(
         self,
-        n_object_features,
         n_hidden=2,
         n_units=8,
         loss_function="binary_crossentropy",
@@ -49,8 +48,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Number of features of the object space
             n_hidden : int
                 Number of hidden layers used in the scoring network
             n_units : int
@@ -80,7 +77,6 @@ def __init__(
 
         """
         super().__init__(
-            n_object_features=n_object_features,
             n_hidden=n_hidden,
             n_units=n_units,
             loss_function=loss_function,
@@ -95,11 +91,7 @@ def __init__(
             **kwargs
         )
         self.logger = logging.getLogger(CmpNetChoiceFunction.__name__)
-        self.logger.info(
-            "Initializing network with object features {}".format(
-                self.n_object_features
-            )
-        )
+        self.logger.info("Initializing network")
         self.threshold = 0.5
 
     def _convert_instances_(self, X, Y):

diff --git a/csrank/choicefunction/fate_choice.py b/csrank/choicefunction/fate_choice.py
@@ -13,7 +13,6 @@
 class FATEChoiceFunction(FATENetwork, ChoiceFunctions):
     def __init__(
         self,
-        n_object_features,
         n_hidden_set_layers=2,
         n_hidden_set_units=2,
         n_hidden_joint_layers=32,
@@ -50,8 +49,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Dimensionality of the feature space of each object
             n_hidden_set_layers : int
                 Number of set layers.
             n_hidden_set_units : int
@@ -82,7 +79,6 @@ def __init__(
         self.loss_function = loss_function
         self.metrics = metrics
         super().__init__(
-            n_object_features=n_object_features,
             n_hidden_set_layers=n_hidden_set_layers,
             n_hidden_set_units=n_hidden_set_units,
             n_hidden_joint_layers=n_hidden_joint_layers,

diff --git a/csrank/choicefunction/fatelinear_choice.py b/csrank/choicefunction/fatelinear_choice.py
@@ -10,8 +10,6 @@
 class FATELinearChoiceFunction(FATELinearCore, ChoiceFunctions):
     def __init__(
         self,
-        n_object_features,
-        n_objects,
         n_hidden_set_units=2,
         loss_function=binary_crossentropy,
         learning_rate=1e-3,
@@ -41,10 +39,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Dimensionality of the feature space of each object
-            n_objects : int
-                Number of objects in each choice set
             n_hidden_set_units : int
                 Number of hidden set units.
             batch_size : int
@@ -57,8 +51,6 @@ def __init__(
                 Keyword arguments for the @FATENetwork
         """
         super().__init__(
-            n_object_features=n_object_features,
-            n_objects=n_objects,
             n_hidden_set_units=n_hidden_set_units,
             learning_rate=learning_rate,
             batch_size=batch_size,

diff --git a/csrank/choicefunction/feta_choice.py b/csrank/choicefunction/feta_choice.py
@@ -24,8 +24,6 @@
 class FETAChoiceFunction(FETANetwork, ChoiceFunctions):
     def __init__(
         self,
-        n_objects,
-        n_object_features,
         n_hidden=2,
         n_units=8,
         add_zeroth_order_model=False,
@@ -61,10 +59,6 @@ def __init__(
 
             Parameters
             ----------
-            n_objects : int
-                Number of objects in each query set
-            n_object_features : int
-                Dimensionality of the feature space of each object
             n_hidden : int
                 Number of hidden layers
             n_units : int
@@ -97,8 +91,6 @@ def __init__(
                 Keyword arguments for the hidden units
         """
         super().__init__(
-            n_objects=n_objects,
-            n_object_features=n_object_features,
             n_hidden=n_hidden,
             n_units=n_units,
             add_zeroth_order_model=add_zeroth_order_model,
@@ -119,7 +111,9 @@ def __init__(
         self.logger = logging.getLogger(FETAChoiceFunction.__name__)
 
     def _construct_layers(self, **kwargs):
-        self.input_layer = Input(shape=(self.n_objects, self.n_object_features))
+        self.input_layer = Input(
+            shape=(self.n_objects_fit_, self.n_object_features_fit_)
+        )
         # Todo: Variable sized input
         # X = Input(shape=(None, n_features))
         if self.batch_normalization:
@@ -177,7 +171,7 @@ def create_input_lambda(i):
             self.logger.debug("Create 0th order model")
             zeroth_order_outputs = []
             inputs = []
-            for i in range(self.n_objects):
+            for i in range(self.n_objects_fit_):
                 x = create_input_lambda(i)(self.input_layer)
                 inputs.append(x)
                 for hidden in self.hidden_layers_zeroth:
@@ -186,8 +180,8 @@ def create_input_lambda(i):
             zeroth_order_scores = concatenate(zeroth_order_outputs)
             self.logger.debug("0th order model finished")
         self.logger.debug("Create 1st order model")
-        outputs = [list() for _ in range(self.n_objects)]
-        for i, j in combinations(range(self.n_objects), 2):
+        outputs = [list() for _ in range(self.n_objects_fit_)]
+        for i, j in combinations(range(self.n_objects_fit_), 2):
             if self._use_zeroth_model:
                 x1 = inputs[i]
                 x2 = inputs[j]
@@ -296,7 +290,7 @@ def fit(
             self.threshold = 0.5
 
     def sub_sampling(self, X, Y):
-        if self._n_objects <= self.max_number_of_objects:
+        if self.n_objects_fit_ <= self.max_number_of_objects:
             return X, Y
         n_objects = self.max_number_of_objects
         bucket_size = int(X.shape[1] / n_objects)

diff --git a/csrank/choicefunction/fetalinear_choice.py b/csrank/choicefunction/fetalinear_choice.py
@@ -10,8 +10,6 @@
 class FETALinearChoiceFunction(FETALinearCore, ChoiceFunctions):
     def __init__(
         self,
-        n_object_features,
-        n_objects,
         loss_function=binary_crossentropy,
         learning_rate=5e-3,
         batch_size=256,
@@ -40,10 +38,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Dimensionality of the feature space of each object
-            n_objects : int
-                Number of objects in each choice set
             n_hidden_set_units : int
                 Number of hidden set units.
             batch_size : int
@@ -56,8 +50,6 @@ def __init__(
                 Keyword arguments for the @FATENetwork
         """
         super().__init__(
-            n_object_features=n_object_features,
-            n_objects=n_objects,
             learning_rate=learning_rate,
             batch_size=batch_size,
             loss_function=loss_function,

diff --git a/csrank/choicefunction/generalized_linear_model.py b/csrank/choicefunction/generalized_linear_model.py
@@ -30,9 +30,7 @@
 
 
 class GeneralizedLinearModel(ChoiceFunctions, Learner):
-    def __init__(
-        self, n_object_features, regularization="l2", random_state=None, **kwargs
-    ):
+    def __init__(self, regularization="l2", random_state=None, **kwargs):
         """
             Create an instance of the GeneralizedLinearModel model for learning the choice function. This model is
             adapted from the multinomial logit model :class:`csrank.discretechoice.multinomial_logit_model.MultinomialLogitModel`.
@@ -52,8 +50,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Number of features of the object space
             regularization : string, optional
                 Regularization technique to be used for estimating the weights
             random_state : int or object
@@ -68,7 +64,6 @@ def __init__(
                 [2] Kenneth Train. Qualitative choice analysis. Cambridge, MA: MIT Press, 1986
         """
         self.logger = logging.getLogger(GeneralizedLinearModel.__name__)
-        self.n_object_features = n_object_features
         if regularization in ["l1", "l2"]:
             self.regularization = regularization
         else:
@@ -156,8 +151,8 @@ def construct_model(self, X, Y):
         with pm.Model() as self.model:
             self.Xt = theano.shared(X)
             self.Yt = theano.shared(Y)
-            shapes = {"weights": self.n_object_features}
-            # shapes = {'weights': (self.n_object_features, 3)}
+            shapes = {"weights": self.n_object_features_fit_}
+            # shapes = {'weights': (self.n_object_features_fit_, 3)}
             weights_dict = create_weight_dictionary(self.model_configuration, shapes)
             intercept = pm.Normal("intercept", mu=0, sd=10)
             utility = tt.dot(self.Xt, weights_dict["weights"]) + intercept
@@ -274,14 +269,15 @@ def _fit(
         },
         **kwargs
     ):
+        _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
         self.construct_model(X, Y)
         fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs)
 
     def _predict_scores_fixed(self, X, **kwargs):
         d = dict(pm.summary(self.trace)["mean"])
         intercept = 0.0
         weights = np.array(
-            [d["weights[{}]".format(i)] for i in range(self.n_object_features)]
+            [d["weights[{}]".format(i)] for i in range(self.n_object_features_fit_)]
         )
         if "intercept" in d:
             intercept = intercept + d["intercept"]

diff --git a/csrank/choicefunction/pairwise_choice.py b/csrank/choicefunction/pairwise_choice.py
@@ -10,7 +10,6 @@
 class PairwiseSVMChoiceFunction(PairwiseSVM, ChoiceFunctions):
     def __init__(
         self,
-        n_object_features,
         C=1.0,
         tol=1e-4,
         normalize=True,
@@ -30,8 +29,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Number of features of the object space
             C : float, optional
                 Penalty parameter of the error term
             tol : float, optional
@@ -54,7 +51,6 @@ def __init__(
 
         """
         super().__init__(
-            n_object_features=n_object_features,
             C=C,
             tol=tol,
             normalize=normalize,
@@ -63,11 +59,7 @@ def __init__(
             **kwargs
         )
         self.logger = logging.getLogger(PairwiseSVMChoiceFunction.__name__)
-        self.logger.info(
-            "Initializing network with object features {}".format(
-                self.n_object_features
-            )
-        )
+        self.logger.info("Initializing network")
         self.threshold = 0.5
 
     def _convert_instances_(self, X, Y):
@@ -80,7 +72,7 @@ def _convert_instances_(self, X, Y):
             y_single,
         ) = generate_complete_pairwise_dataset(X, Y)
         del garbage
-        assert x_train.shape[1] == self.n_object_features
+        assert x_train.shape[1] == self.n_object_features_fit_
         self.logger.debug(
             "Finished the Dataset with instances {}".format(x_train.shape[0])
         )
@@ -107,6 +99,7 @@ def fit(self, X, Y, tune_size=0.1, thin_thresholds=1, verbose=0, **kwd):
                 Keyword arguments for the fit function
 
         """
+        _n_instances, self.n_objects_fit_, self.n_object_features_fit_ = X.shape
         if tune_size > 0:
             X_train, X_val, Y_train, Y_val = train_test_split(
                 X, Y, test_size=tune_size, random_state=self.random_state

diff --git a/csrank/choicefunction/ranknet_choice.py b/csrank/choicefunction/ranknet_choice.py
@@ -12,7 +12,6 @@
 class RankNetChoiceFunction(RankNetCore, ChoiceFunctions):
     def __init__(
         self,
-        n_object_features,
         n_hidden=2,
         n_units=8,
         loss_function="binary_crossentropy",
@@ -40,8 +39,6 @@ def __init__(
 
             Parameters
             ----------
-            n_object_features : int
-                Number of features of the object space
             n_hidden : int
                 Number of hidden layers used in the scoring network
             n_units : int
@@ -74,7 +71,6 @@ def __init__(
                 [2] Burges, C. J. (2010). "From ranknet to lambdarank to lambdamart: An overview.", Learning, 11(23-581).
         """
         super().__init__(
-            n_object_features=n_object_features,
             n_hidden=n_hidden,
             n_units=n_units,
             loss_function=loss_function,
@@ -89,11 +85,7 @@ def __init__(
             **kwargs
         )
         self.logger = logging.getLogger(RankNetChoiceFunction.__name__)
-        self.logger.info(
-            "Initializing network with object features {}".format(
-                self.n_object_features
-            )
-        )
+        self.logger.info("Initializing network")
         self.threshold = 0.5
 
     def construct_model(self):