diff --git a/CHANGELOG.md b/CHANGELOG.md index 88154628..ed1fa8ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - `from_config`, `get_config` and `get_params` methods to all models except neural-net-based([#170](https://github.com/MobileTeleSystems/RecTools/pull/170)) +- Optional `epochs` argument to `ImplicitALSWrapperModel.fit` method ([#203](https://github.com/MobileTeleSystems/RecTools/pull/203)) ## [0.8.0] - 28.08.2024 diff --git a/rectools/models/implicit_als.py b/rectools/models/implicit_als.py index cc279851..3538c40a 100644 --- a/rectools/models/implicit_als.py +++ b/rectools/models/implicit_als.py @@ -197,9 +197,12 @@ def _from_config(cls, config: ImplicitALSWrapperModelConfig) -> tpe.Self: model = model_cls(**config.model.params) return cls(model=model, verbose=config.verbose, fit_features_together=config.fit_features_together) - def _fit(self, dataset: Dataset) -> None: # type: ignore + # TODO: move to `epochs` argument of `partial_fit` method when implemented + def _fit(self, dataset: Dataset, epochs: tp.Optional[int] = None) -> None: # type: ignore self.model = deepcopy(self._model) ui_csr = dataset.get_user_item_matrix(include_weights=True).astype(np.float32) + if epochs is None: + epochs = self.model.iterations if self.fit_features_together: fit_als_with_features_together_inplace( @@ -207,6 +210,7 @@ def _fit(self, dataset: Dataset) -> None: # type: ignore ui_csr, dataset.get_hot_user_features(), dataset.get_hot_item_features(), + epochs, self.verbose, ) else: @@ -215,6 +219,7 @@ def _fit(self, dataset: Dataset) -> None: # type: ignore ui_csr, dataset.get_hot_user_features(), dataset.get_hot_item_features(), + epochs, self.verbose, ) @@ -282,6 +287,7 @@ def fit_als_with_features_separately_inplace( ui_csr: sparse.csr_matrix, user_features: tp.Optional[Features], item_features: tp.Optional[Features], + iterations: int, verbose: int = 0, ) -> None: """ @@ -300,7 +306,15 @@ def fit_als_with_features_separately_inplace( verbose : int Whether to print output. """ + # If model was fitted we should drop any learnt embeddings except actual latent factors + if model.user_factors is not None and model.item_factors is not None: + # Without .copy() gpu.Matrix will break correct slicing + user_factors = get_users_vectors(model)[:, : model.factors].copy() + item_factors = get_items_vectors(model)[:, : model.factors].copy() + _set_factors(model, user_factors, item_factors) + iu_csr = ui_csr.T.tocsr(copy=False) + model.iterations = iterations model.fit(ui_csr, show_progress=verbose > 0) user_factors_chunks = [get_users_vectors(model)] @@ -321,10 +335,13 @@ def fit_als_with_features_separately_inplace( user_factors = np.hstack(user_factors_chunks) item_factors = np.hstack(item_factors_chunks) + _set_factors(model, user_factors, item_factors) + + +def _set_factors(model: AnyAlternatingLeastSquares, user_factors: np.ndarray, item_factors: np.ndarray) -> None: if isinstance(model, GPUAlternatingLeastSquares): # pragma: no cover user_factors = implicit.gpu.Matrix(user_factors) item_factors = implicit.gpu.Matrix(item_factors) - model.user_factors = user_factors model.item_factors = item_factors @@ -363,36 +380,30 @@ def _fit_paired_factors( def _init_latent_factors_cpu( model: CPUAlternatingLeastSquares, n_users: int, n_items: int ) -> tp.Tuple[np.ndarray, np.ndarray]: - """Logic is copied and pasted from original implicit library code""" + """ + Logic is copied and pasted from original implicit library code. + This method is used only for model that hasn't been fitted yet. + """ random_state = check_random_state(model.random_state) - if model.user_factors is None: - user_latent_factors = random_state.random((n_users, model.factors)) * 0.01 - else: - user_latent_factors = model.user_factors - if model.item_factors is None: - item_latent_factors = random_state.random((n_items, model.factors)) * 0.01 - else: - item_latent_factors = model.item_factors + user_latent_factors = random_state.random((n_users, model.factors)) * 0.01 + item_latent_factors = random_state.random((n_items, model.factors)) * 0.01 return user_latent_factors, item_latent_factors def _init_latent_factors_gpu( model: GPUAlternatingLeastSquares, n_users: int, n_items: int ) -> tp.Tuple[np.ndarray, np.ndarray]: # pragma: no cover - """Logic is copied and pasted from original implicit library code""" + """ + Logic is copied and pasted from original implicit library code. + This method is used only for model that hasn't been fitted yet. + """ random_state = check_random_state(model.random_state) - if model.user_factors is None: - user_latent_factors = random_state.uniform( - low=-0.5 / model.factors, high=0.5 / model.factors, size=(n_users, model.factors) - ) - else: - user_latent_factors = model.user_factors.to_numpy() - if model.item_factors is None: - item_latent_factors = random_state.uniform( - low=-0.5 / model.factors, high=0.5 / model.factors, size=(n_items, model.factors) - ) - else: - item_latent_factors = model.item_factors.to_numpy() + user_latent_factors = random_state.uniform( + low=-0.5 / model.factors, high=0.5 / model.factors, size=(n_users, model.factors) + ) + item_latent_factors = random_state.uniform( + low=-0.5 / model.factors, high=0.5 / model.factors, size=(n_items, model.factors) + ) return user_latent_factors, item_latent_factors @@ -401,6 +412,7 @@ def fit_als_with_features_together_inplace( ui_csr: sparse.csr_matrix, user_features: tp.Optional[Features], item_features: tp.Optional[Features], + iterations: int, verbose: int = 0, ) -> None: """ @@ -421,6 +433,65 @@ def fit_als_with_features_together_inplace( """ n_users, n_items = ui_csr.shape + if model.user_factors is None or model.item_factors is None: + user_factors, item_factors, n_user_explicit_factors, n_item_explicit_factors = ( + _init_user_item_factors_for_combined_training_with_features( + model, n_users, n_items, user_features, item_features + ) + ) + else: + user_factors = get_users_vectors(model) + item_factors = get_items_vectors(model) + n_user_explicit_factors = user_features.values.shape[1] if user_features is not None else 0 + n_item_explicit_factors = item_features.values.shape[1] if item_features is not None else 0 + + # Fix number of factors + n_latent_factors = model.factors + model.factors = n_latent_factors + n_user_explicit_factors + n_item_explicit_factors + + # Give the positive examples more weight if asked for (implicit library logic copy) + ui_csr = model.alpha * ui_csr + + if isinstance(model, GPUAlternatingLeastSquares): # pragma: no cover + _fit_combined_factors_on_gpu_inplace( + model, + ui_csr, + user_factors, + item_factors, + n_user_explicit_factors, + n_item_explicit_factors, + verbose, + iterations, + ) + else: + _fit_combined_factors_on_cpu_inplace( + model, + ui_csr, + user_factors, + item_factors, + n_user_explicit_factors, + n_item_explicit_factors, + verbose, + iterations, + ) + + # Fix back model factors + model.factors = n_latent_factors + + +def _init_user_item_factors_for_combined_training_with_features( + model: AnyAlternatingLeastSquares, + n_users: int, + n_items: int, + user_features: tp.Optional[Features], + item_features: tp.Optional[Features], +) -> tp.Tuple[np.ndarray, np.ndarray, int, int]: + """ + Init user and item factors for model that hasn't been initialized yet. + Final factors will include latent factors, explicit factors from + user/item features and their paired item/user factors. + This method is only used when `fit_features_together` is set to ``True`` + """ # Prepare explicit factors user_explicit_factors: np.ndarray if user_features is None: @@ -442,10 +513,6 @@ def fit_als_with_features_together_inplace( else: user_latent_factors, item_latent_factors = _init_latent_factors_cpu(model, n_users, n_items) - # Fix number of factors - n_latent_factors = model.factors - model.factors = n_latent_factors + n_user_explicit_factors + n_item_explicit_factors - # Prepare paired factors user_factors_paired_to_items = np.zeros((n_users, n_item_explicit_factors)) item_factors_paired_to_users = np.zeros((n_items, n_user_explicit_factors)) @@ -466,29 +533,7 @@ def fit_als_with_features_together_inplace( ) ).astype(model.dtype) - # Give the positive examples more weight if asked for (implicit library logic copy) - ui_csr = model.alpha * ui_csr - - if isinstance(model, GPUAlternatingLeastSquares): # pragma: no cover - _fit_combined_factors_on_gpu_inplace( - model, - ui_csr, - user_factors, - item_factors, - n_user_explicit_factors, - n_item_explicit_factors, - verbose, - ) - else: - _fit_combined_factors_on_cpu_inplace( - model, - ui_csr, - user_factors, - item_factors, - n_user_explicit_factors, - n_item_explicit_factors, - verbose, - ) + return user_factors, item_factors, n_user_explicit_factors, n_item_explicit_factors def _fit_combined_factors_on_cpu_inplace( @@ -499,6 +544,7 @@ def _fit_combined_factors_on_cpu_inplace( n_user_explicit_factors: int, n_item_explicit_factors: int, verbose: int, + iterations: int, ) -> None: n_factors = user_factors.shape[1] user_explicit_factors = user_factors[:, :n_user_explicit_factors].copy() @@ -512,7 +558,7 @@ def _fit_combined_factors_on_cpu_inplace( solver = model.solver - for _ in tqdm(range(model.iterations), disable=verbose == 0): + for _ in tqdm(range(iterations), disable=verbose == 0): solver( ui_csr, @@ -544,6 +590,7 @@ def _fit_combined_factors_on_gpu_inplace( n_user_explicit_factors: int, n_item_explicit_factors: int, verbose: int, + iterations: int, ) -> None: # pragma: no cover n_factors = user_factors.shape[1] user_explicit_factors = user_factors[:, :n_user_explicit_factors].copy() @@ -563,7 +610,7 @@ def _fit_combined_factors_on_gpu_inplace( _YtY = implicit.gpu.Matrix.zeros(model.factors, model.factors) _XtX = implicit.gpu.Matrix.zeros(model.factors, model.factors) - for _ in tqdm(range(model.iterations), disable=verbose == 0): + for _ in tqdm(range(iterations), disable=verbose == 0): model.solver.calculate_yty(Y, _YtY, model.regularization) model.solver.least_squares(ui_csr_cuda, X, _YtY, Y, model.cg_steps) diff --git a/tests/models/test_implicit_als.py b/tests/models/test_implicit_als.py index 564ea4a6..0573c8a7 100644 --- a/tests/models/test_implicit_als.py +++ b/tests/models/test_implicit_als.py @@ -30,6 +30,8 @@ AnyAlternatingLeastSquares, CPUAlternatingLeastSquares, GPUAlternatingLeastSquares, + get_items_vectors, + get_users_vectors, ) from rectools.models.utils import recommend_from_scores @@ -64,6 +66,29 @@ def _init_model_factors_inplace(model: AnyAlternatingLeastSquares, dataset: Data def dataset(self) -> Dataset: return DATASET + @pytest.fixture + def dataset_w_features(self) -> Dataset: + user_id_map = IdMap.from_values(["u1", "u2", "u3"]) + item_id_map = IdMap.from_values(["i1", "i2", "i3"]) + interactions_df = pd.DataFrame( + [ + ["u1", "i1", 0.1, "2021-09-09"], + ["u2", "i1", 0.1, "2021-09-09"], + ["u2", "i2", 0.5, "2021-09-05"], + ["u2", "i3", 0.2, "2021-09-05"], + ["u1", "i3", 0.2, "2021-09-05"], + ["u3", "i1", 0.2, "2021-09-05"], + ], + columns=[Columns.User, Columns.Item, Columns.Weight, Columns.Datetime], + ) + interactions = Interactions.from_raw(interactions_df, user_id_map, item_id_map) + user_features_df = pd.DataFrame({"id": ["u1", "u2", "u3"], "f1": [0.3, 0.4, 0.5]}) + user_features = DenseFeatures.from_dataframe(user_features_df, user_id_map) + item_features_df = pd.DataFrame({"id": ["i1", "i1"], "feature": ["f1", "f2"], "value": [2.1, 100]}) + item_features = SparseFeatures.from_flatten(item_features_df, item_id_map) + dataset = Dataset(user_id_map, item_id_map, interactions, user_features, item_features) + return dataset + @pytest.mark.parametrize( "filter_viewed,expected", ( @@ -206,26 +231,10 @@ def test_with_whitelist( ), ), ) - def test_happy_path_with_features(self, fit_features_together: bool, expected: pd.DataFrame, use_gpu: bool) -> None: - user_id_map = IdMap.from_values(["u1", "u2", "u3"]) - item_id_map = IdMap.from_values(["i1", "i2", "i3"]) - interactions_df = pd.DataFrame( - [ - ["u1", "i1", 0.1, "2021-09-09"], - ["u2", "i1", 0.1, "2021-09-09"], - ["u2", "i2", 0.5, "2021-09-05"], - ["u2", "i3", 0.2, "2021-09-05"], - ["u1", "i3", 0.2, "2021-09-05"], - ["u3", "i1", 0.2, "2021-09-05"], - ], - columns=[Columns.User, Columns.Item, Columns.Weight, Columns.Datetime], - ) - interactions = Interactions.from_raw(interactions_df, user_id_map, item_id_map) - user_features_df = pd.DataFrame({"id": ["u1", "u2", "u3"], "f1": [0.3, 0.4, 0.5]}) - user_features = DenseFeatures.from_dataframe(user_features_df, user_id_map) - item_features_df = pd.DataFrame({"id": ["i1", "i1"], "feature": ["f1", "f2"], "value": [2.1, 100]}) - item_features = SparseFeatures.from_flatten(item_features_df, item_id_map) - dataset = Dataset(user_id_map, item_id_map, interactions, user_features, item_features) + def test_happy_path_with_features( + self, fit_features_together: bool, expected: pd.DataFrame, use_gpu: bool, dataset_w_features: Dataset + ) -> None: + dataset = dataset_w_features # In case of big number of iterations there are differences between CPU and GPU results base_model = AlternatingLeastSquares(factors=32, num_threads=2, use_gpu=use_gpu) @@ -355,6 +364,39 @@ def test_i2i_with_warm_and_cold_items(self, use_gpu: bool, dataset: Dataset) -> k=2, ) + # TODO: move this test to `partial_fit` method when implemented + @pytest.mark.parametrize("fit_features_together", (False, True)) + @pytest.mark.parametrize("use_features_in_dataset", (False, True)) + def test_per_epoch_fitting_consistent_with_regular_fitting( + self, + dataset: Dataset, + dataset_w_features: Dataset, + fit_features_together: bool, + use_features_in_dataset: bool, + use_gpu: bool, + ) -> None: + if use_features_in_dataset: + dataset = dataset_w_features + + iterations = 20 + + base_model_1 = AlternatingLeastSquares( + factors=2, num_threads=2, iterations=iterations, random_state=32, use_gpu=use_gpu + ) + model_1 = ImplicitALSWrapperModel(model=base_model_1, fit_features_together=fit_features_together) + model_1.fit(dataset) + + base_model_2 = AlternatingLeastSquares( + factors=2, num_threads=2, iterations=iterations, random_state=32, use_gpu=use_gpu + ) + model_2 = ImplicitALSWrapperModel(model=base_model_2, fit_features_together=fit_features_together) + for _ in range(iterations): + model_2.fit(dataset, epochs=1) + model_2._model = deepcopy(model_2.model) # pylint: disable=protected-access + + assert np.allclose(get_users_vectors(model_1.model), get_users_vectors(model_2.model)) + assert np.allclose(get_items_vectors(model_1.model), get_items_vectors(model_2.model)) + class CustomALS(CPUAlternatingLeastSquares): pass