From b30888342e454a58ea000edc8b985a0b7ac5451b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?V=C3=ADt=20Novotn=C3=BD?= <witiko@mail.muni.cz>
Date: Fri, 7 Aug 2020 15:01:58 +0200
Subject: [PATCH] Reduce memory use of the term similarity matrix constructor,
 deprecate the positive_definite parameter, and extend normalization
 capabilities of the inner_product method (#2783)

* Deprecate SparseTermSimilarityMatrix's positive_definite parameter

* Reference paper on efficient implementation of soft cosine similarity

* Add example with Annoy indexer to SparseTermSimilarityMatrix

* Add example of obtaining word embeddings from SparseTermSimilarityMatrix

* Reduce space complexity of SparseTermSimilarityMatrix construction
Build matrix using arrays and bitfields rather than DOK sparse format

This work is based on the following blog post by @maciejkula:
https://maciejkula.github.io/2015/02/22/incremental-construction-of-sparse-matrices/

* Fix a typo in the soft cosine similarity Jupyter notebook

* Add human-readable string representation for TermSimilarityIndex

* Avoid sparse term similarity matrix computation when nonzero_limit <= 0

* Extend normalization in the inner_product method

Support the `maintain` vector normalization scheme.
Support separate vector normalization schemes for queries and documents.

* Remove a note in the docstring of SparseTermSimilarityMatrix

* Rerun continuous integration tests

* Use ==/!= to compare constant literals

* Add human-readable string representation for TermSimilarityIndex (cont.)

* Prod flake8 with a coding style violation in a docstring

* Collapse two lambdas into one internal function

* Revert "Prod flake8 with a coding style violation in a docstring"

This reverts commit 6557b849732b314570ea9d5132f1731d964e2fe6.

* Avoid str.format()

* Slice SparseTermSimilarityMatrix.inner_product tests by input types

* Remove similarity_type_code local variable

* Remove starting underscore from local function name

* Save indentation level and define populate_buffers function

* Extract SparseTermSimilarityMatrix constructor body to _create_source

* Extract NON_NEGATIVE_NORM_ASSERTION_MESSAGE to a module-level constant

* Extract cell assignment logic to cell_full local function

* Split variable swapping into three separate statements

* Extract normalization from the body of SparseTermSimilarityMatrix.inner_product

* Wrap overlong line

* Add test_inner_product_zerovector_zerovector and test_inner_product_zerovector_vector tests

* Further split test_inner_product into 63 test cases

* Raise ValueError when dictionary is empty
---
 docs/notebooks/soft_cosine_tutorial.ipynb |   4 +-
 gensim/similarities/docsim.py             |   2 +-
 gensim/similarities/termsim.py            | 527 +++++++++++++-------
 gensim/test/test_similarities.py          | 562 ++++++++++++++++++++--
 4 files changed, 862 insertions(+), 233 deletions(-)

diff --git a/docs/notebooks/soft_cosine_tutorial.ipynb b/docs/notebooks/soft_cosine_tutorial.ipynb
index 358c80ed02..aadbecf6a5 100644
--- a/docs/notebooks/soft_cosine_tutorial.ipynb
+++ b/docs/notebooks/soft_cosine_tutorial.ipynb
@@ -225,7 +225,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Number of documents: 3\n",
+      "Number of documents: 2274338\n",
       "CPU times: user 2min 1s, sys: 1.9 s, total: 2min 3s\n",
       "Wall time: 2min 56s\n"
      ]
@@ -259,7 +259,7 @@
     "        [preprocess(relcomment[\"RelCText\"]) for relcomment in thread[\"RelComments\"]])\n",
     "    for thread in api.load(\"semeval-2016-2017-task3-subtaskA-unannotated\")]))\n",
     "\n",
-    "print(\"Number of documents: %d\" % len(documents))"
+    "print(\"Number of documents: %d\" % len(corpus))"
    ]
   },
   {
diff --git a/gensim/similarities/docsim.py b/gensim/similarities/docsim.py
index daba706eb1..b014952499 100755
--- a/gensim/similarities/docsim.py
+++ b/gensim/similarities/docsim.py
@@ -978,7 +978,7 @@ def get_similarities(self, query):
         is_corpus, query = utils.is_corpus(query)
         if not is_corpus and isinstance(query, numpy.ndarray):
             query = [self.corpus[i] for i in query]  # convert document indexes to actual documents
-        result = self.similarity_matrix.inner_product(query, self.corpus, normalized=True)
+        result = self.similarity_matrix.inner_product(query, self.corpus, normalized=(True, True))
 
         if scipy.sparse.issparse(result):
             return numpy.asarray(result.todense())
diff --git a/gensim/similarities/termsim.py b/gensim/similarities/termsim.py
index c0e61e1490..3dcd4c6ae6 100644
--- a/gensim/similarities/termsim.py
+++ b/gensim/similarities/termsim.py
@@ -8,11 +8,14 @@
 This module provides classes that deal with term similarities.
 """
 
+from array import array
 from itertools import chain
 import logging
 from math import sqrt
+import warnings
 
 import numpy as np
+from six.moves import range
 from scipy import sparse
 
 from gensim.matutils import corpus2csc
@@ -20,6 +23,10 @@
 
 logger = logging.getLogger(__name__)
 
+NON_NEGATIVE_NORM_ASSERTION_MESSAGE = u"sparse documents must not contain any explicit " \
+    u"zero entries and the similarity matrix S must satisfy x^T * S * x >= 0 for any " \
+    u"nonzero bag-of-words vector x."
+
 
 class TermSimilarityIndex(SaveLoad):
     """
@@ -52,6 +59,10 @@ def most_similar(self, term, topn=10):
         """
         raise NotImplementedError
 
+    def __str__(self):
+        members = ', '.join('%s=%s' % pair for pair in vars(self).items())
+        return '%s(%s)' % (self.__class__.__name__, members)
+
 
 class UniformTermSimilarityIndex(TermSimilarityIndex):
     """
@@ -86,33 +97,6 @@ def most_similar(self, t1, topn=10):
             yield (t2, self.term_similarity)
 
 
-def _shortest_uint_dtype(max_value):
-    """Get the shortest unsingned integer data-type required for representing values up to a given
-    maximum value.
-
-    Returns the shortest unsingned integer data-type required for representing values up to a given
-    maximum value.
-
-    Parameters
-    ----------
-    max_value : int
-        The maximum value we wish to represent.
-
-    Returns
-    -------
-    data-type
-        The shortest unsigned integer data-type required for representing values up to a given
-        maximum value.
-    """
-    if max_value < 2**8:
-        return np.uint8
-    elif max_value < 2**16:
-        return np.uint16
-    elif max_value < 2**32:
-        return np.uint32
-    return np.uint64
-
-
 class WordEmbeddingSimilarityIndex(TermSimilarityIndex):
     """
     Use objects of this class to:
@@ -156,32 +140,288 @@ def most_similar(self, t1, topn=10):
                     yield (t2, similarity**self.exponent)
 
 
+def _shortest_uint_dtype(max_value):
+    """Get the shortest unsingned integer data-type required for representing values up to a given
+    maximum value.
+
+    Returns the shortest unsingned integer data-type required for representing values up to a given
+    maximum value.
+
+    Parameters
+    ----------
+    max_value : int
+        The maximum value we wish to represent.
+
+    Returns
+    -------
+    data-type
+        The shortest unsigned integer data-type required for representing values up to a given
+        maximum value.
+    """
+    if max_value < 2**8:
+        return np.uint8
+    elif max_value < 2**16:
+        return np.uint16
+    elif max_value < 2**32:
+        return np.uint32
+    return np.uint64
+
+
+def _create_source(index, dictionary, tfidf, symmetric, dominant, nonzero_limit, dtype):
+    """Build a sparse term similarity matrix using a term similarity index.
+
+    Returns
+    -------
+    matrix : :class:`scipy.sparse.coo_matrix`
+        The sparse term similarity matrix.
+
+    """
+    assert isinstance(index, TermSimilarityIndex)
+    assert dictionary is not None
+    matrix_order = len(dictionary)
+
+    if matrix_order == 0:
+        raise ValueError('Dictionary provided to SparseTermSimilarityMatrix must not be empty')
+
+    logger.info("constructing a sparse term similarity matrix using %s", index)
+
+    if nonzero_limit is None:
+        nonzero_limit = matrix_order
+
+    def tfidf_sort_key(term_index):
+        if isinstance(term_index, tuple):
+            term_index, *_ = term_index
+        term_idf = tfidf.idfs[term_index]
+        return (-term_idf, term_index)
+
+    if tfidf is None:
+        logger.info("iterating over columns in dictionary order")
+        columns = sorted(dictionary.keys())
+    else:
+        assert max(tfidf.idfs) == matrix_order - 1
+        logger.info("iterating over columns in tf-idf order")
+        columns = sorted(tfidf.idfs.keys(), key=tfidf_sort_key)
+
+    nonzero_counter_dtype = _shortest_uint_dtype(nonzero_limit)
+
+    column_nonzero = np.array([0] * matrix_order, dtype=nonzero_counter_dtype)
+    if dominant:
+        column_sum = np.zeros(matrix_order, dtype=dtype)
+    if symmetric:
+        assigned_cells = set()
+    row_buffer = array('Q')
+    column_buffer = array('Q')
+    if dtype is np.float16 or dtype is np.float32:
+        data_buffer = array('f')
+    elif dtype is np.float64:
+        data_buffer = array('d')
+    else:
+        raise ValueError('Dtype %s is unsupported, use numpy.float16, float32, or float64.' % dtype)
+
+    def cell_full(t1_index, t2_index, similarity):
+        if dominant and column_sum[t1_index] + abs(similarity) >= 1.0:
+            return True  # after adding the similarity, the matrix would cease to be strongly diagonally dominant
+        assert column_nonzero[t1_index] <= nonzero_limit
+        if column_nonzero[t1_index] == nonzero_limit:
+            return True  # after adding the similarity, the column would contain more than nonzero_limit elements
+        if symmetric and (t1_index, t2_index) in assigned_cells:
+            return True  # a similarity has already been assigned to this cell
+        return False
+
+    def populate_buffers(t1_index, t2_index, similarity):
+        column_buffer.append(t1_index)
+        row_buffer.append(t2_index)
+        data_buffer.append(similarity)
+        column_nonzero[t1_index] += 1
+        if symmetric:
+            assigned_cells.add((t1_index, t2_index))
+        if dominant:
+            column_sum[t1_index] += abs(similarity)
+
+    try:
+        from tqdm import tqdm as progress_bar
+    except ImportError:
+        def progress_bar(iterable):
+            return iterable
+
+    for column_number, t1_index in enumerate(progress_bar(columns)):
+        column_buffer.append(column_number)
+        row_buffer.append(column_number)
+        data_buffer.append(1.0)
+
+        if nonzero_limit <= 0:
+            continue
+
+        t1 = dictionary[t1_index]
+        num_nonzero = column_nonzero[t1_index]
+        num_rows = nonzero_limit - num_nonzero
+        most_similar = [
+            (dictionary.token2id[term], similarity)
+            for term, similarity in index.most_similar(t1, topn=num_rows)
+            if term in dictionary.token2id
+        ] if num_rows > 0 else []
+
+        if tfidf is None:
+            rows = sorted(most_similar)
+        else:
+            rows = sorted(most_similar, key=tfidf_sort_key)
+
+        for t2_index, similarity in rows:
+            if cell_full(t1_index, t2_index, similarity):
+                continue
+            if not symmetric:
+                populate_buffers(t1_index, t2_index, similarity)
+            elif not cell_full(t2_index, t1_index, similarity):
+                populate_buffers(t1_index, t2_index, similarity)
+                populate_buffers(t2_index, t1_index, similarity)
+
+    data_buffer = np.frombuffer(data_buffer, dtype=dtype)
+    row_buffer = np.frombuffer(row_buffer, dtype=np.uint64)
+    column_buffer = np.frombuffer(column_buffer, dtype=np.uint64)
+    matrix = sparse.coo_matrix((data_buffer, (row_buffer, column_buffer)), shape=(matrix_order, matrix_order))
+
+    logger.info(
+        "constructed a sparse term similarity matrix with %0.06f%% density",
+        100.0 * matrix.getnnz() / matrix_order**2,
+    )
+
+    return matrix
+
+
+def _normalize_dense_vector(vector, matrix, normalization):
+    """Normalize a dense vector after a change of basis.
+
+    Parameters
+    ----------
+    vector : 1xN ndarray
+        A dense vector.
+    matrix : NxN ndarray
+        A change-of-basis matrix.
+    normalization : {True, False, 'maintain'}
+        Whether the vector will be L2-normalized (True; corresponds to the soft
+        cosine measure), maintain its L2-norm during the change of basis
+        ('maintain'; corresponds to query expansion with partial membership),
+        or kept as-is (False; corresponds to query expansion).
+
+    Returns
+    -------
+    vector : ndarray
+        The normalized dense vector.
+
+    """
+    if not normalization:
+        return vector
+
+    vector_norm = vector.T.dot(matrix).dot(vector)[0, 0]
+    assert vector_norm >= 0.0, NON_NEGATIVE_NORM_ASSERTION_MESSAGE
+    if normalization == 'maintain' and vector_norm > 0.0:
+        vector_norm /= vector.T.dot(vector)
+    vector_norm = sqrt(vector_norm)
+
+    normalized_vector = vector
+    if vector_norm > 0.0:
+        normalized_vector /= vector_norm
+
+    return normalized_vector
+
+
+def _normalize_dense_corpus(corpus, matrix, normalization):
+    """Normalize a dense corpus after a change of basis.
+
+    Parameters
+    ----------
+    corpus : MxN ndarray
+        A dense corpus.
+    matrix : NxN ndarray
+        A change-of-basis matrix.
+    normalization : {True, False, 'maintain'}
+        Whether the vector will be L2-normalized (True; corresponds to the soft
+        cosine measure), maintain its L2-norm during the change of basis
+        ('maintain'; corresponds to query expansion with partial membership),
+        or kept as-is (False; corresponds to query expansion).
+
+    Returns
+    -------
+    normalized_corpus : ndarray
+        The normalized dense corpus.
+
+    """
+    if not normalization:
+        return corpus
+
+    # use the following equality: np.diag(A.T.dot(B).dot(A)) == A.T.dot(B).multiply(A.T).sum(axis=1).T
+    corpus_norm = np.multiply(corpus.T.dot(matrix), corpus.T).sum(axis=1).T
+    assert corpus_norm.min() >= 0.0, NON_NEGATIVE_NORM_ASSERTION_MESSAGE
+    if normalization == 'maintain':
+        corpus_norm /= np.multiply(corpus.T, corpus.T).sum(axis=1).T
+    corpus_norm = np.sqrt(corpus_norm)
+
+    normalized_corpus = np.multiply(corpus, 1.0 / corpus_norm)
+    normalized_corpus = np.nan_to_num(normalized_corpus)  # account for division by zero
+    return normalized_corpus
+
+
+def _normalize_sparse_corpus(corpus, matrix, normalization):
+    """Normalize a sparse corpus after a change of basis.
+
+    Parameters
+    ----------
+    corpus : MxN :class:`scipy.sparse.csc_matrix`
+        A sparse corpus.
+    matrix : NxN :class:`scipy.sparse.csc_matrix`
+        A change-of-basis matrix.
+    normalization : {True, False, 'maintain'}
+        Whether the vector will be L2-normalized (True; corresponds to the soft
+        cosine measure), maintain its L2-norm during the change of basis
+        ('maintain'; corresponds to query expansion with partial membership),
+        or kept as-is (False; corresponds to query expansion).
+
+    Returns
+    -------
+    normalized_corpus : :class:`scipy.sparse.csc_matrix`
+        The normalized sparse corpus.
+
+    """
+    if not normalization:
+        return corpus
+
+    # use the following equality: np.diag(A.T.dot(B).dot(A)) == A.T.dot(B).multiply(A.T).sum(axis=1).T
+    corpus_norm = corpus.T.dot(matrix).multiply(corpus.T).sum(axis=1).T
+    assert corpus_norm.min() >= 0.0, NON_NEGATIVE_NORM_ASSERTION_MESSAGE
+    if normalization == 'maintain':
+        corpus_norm /= corpus.T.multiply(corpus.T).sum(axis=1).T
+    corpus_norm = np.sqrt(corpus_norm)
+
+    normalized_corpus = corpus.multiply(sparse.csr_matrix(1.0 / corpus_norm))
+    normalized_corpus[normalized_corpus == np.inf] = 0  # account for division by zero
+    return normalized_corpus
+
+
 class SparseTermSimilarityMatrix(SaveLoad):
     """
     Builds a sparse term similarity matrix using a term similarity index.
 
-    Notes
-    -----
-    Building a DOK matrix, and converting it to a CSC matrix carries a significant memory overhead.
-    Future work should switch to building arrays of rows, columns, and non-zero elements and
-    directly passing these arrays to the CSC matrix constructor without copying.
-
     Examples
     --------
     >>> from gensim.test.utils import common_texts
     >>> from gensim.corpora import Dictionary
     >>> from gensim.models import Word2Vec, WordEmbeddingSimilarityIndex
     >>> from gensim.similarities import SoftCosineSimilarity, SparseTermSimilarityMatrix
+    >>> from gensim.similarities.index import AnnoyIndexer
+    >>> from scikits.sparse.cholmod import cholesky
     >>>
     >>> model = Word2Vec(common_texts, size=20, min_count=1)  # train word-vectors
-    >>> termsim_index = WordEmbeddingSimilarityIndex(model.wv)
+    >>> annoy = AnnoyIndexer(model, num_trees=2)  # use annoy for faster word similarity lookups
+    >>> termsim_index = WordEmbeddingSimilarityIndex(model.wv, kwargs={'indexer': annoy})
     >>> dictionary = Dictionary(common_texts)
     >>> bow_corpus = [dictionary.doc2bow(document) for document in common_texts]
-    >>> similarity_matrix = SparseTermSimilarityMatrix(termsim_index, dictionary)  # construct similarity matrix
+    >>> similarity_matrix = SparseTermSimilarityMatrix(termsim_index, dictionary, symmetric=True, dominant=True)
     >>> docsim_index = SoftCosineSimilarity(bow_corpus, similarity_matrix, num_best=10)
     >>>
     >>> query = 'graph trees computer'.split()  # make a query
     >>> sims = docsim_index[dictionary.doc2bow(query)]  # calculate similarity of query to each doc from bow_corpus
+    >>>
+    >>> word_embeddings = cholesky(similarity_matrix.matrix).L()  # obtain word embeddings from similarity matrix
 
     Check out `Tutorial Notebook
     <https://github.com/RaRe-Technologies/gensim/blob/develop/docs/notebooks/soft_cosine_tutorial.ipynb>`_
@@ -192,125 +432,64 @@ class SparseTermSimilarityMatrix(SaveLoad):
     source : :class:`~gensim.similarities.termsim.TermSimilarityIndex` or :class:`scipy.sparse.spmatrix`
         The source of the term similarity. Either a term similarity index that will be used for
         building the term similarity matrix, or an existing sparse term similarity matrix that will
-        be encapsulated and stored in the matrix attribute.
+        be encapsulated and stored in the matrix attribute. When a matrix is specified as the
+        source, any other parameters will be ignored.
     dictionary : :class:`~gensim.corpora.dictionary.Dictionary` or None, optional
         A dictionary that specifies a mapping between terms and the indices of rows and columns
-        of the resulting term similarity matrix. The dictionary may only be `None` when `source` is
+        of the resulting term similarity matrix. The dictionary may only be None when source is
         a :class:`scipy.sparse.spmatrix`.
     tfidf : :class:`gensim.models.tfidfmodel.TfidfModel` or None, optional
         A model that specifies the relative importance of the terms in the dictionary. The columns
         of the term similarity matrix will be build in a decreasing order of importance of
         terms, or in the order of term identifiers if None.
     symmetric : bool, optional
-        Whether the symmetry of the term similarity matrix will be enforced. This parameter only has
-        an effect when `source` is a :class:`scipy.sparse.spmatrix`. Positive definiteness is a
-        necessary precondition if you later wish to derive a change-of-basis matrix from the term
-        similarity matrix using Cholesky factorization.
-    positive_definite: bool, optional
-        Whether the positive definiteness of the term similarity matrix will be enforced through
-        strict column diagonal dominance. Positive definiteness is a necessary precondition if you
-        later wish to derive a change-of-basis matrix from the term similarity matrix using Cholesky
-        factorization.
+        Whether the symmetry of the term similarity matrix will be enforced. Symmetry is a necessary
+        precondition for positive definiteness, which is necessary if you later wish to derive a
+        unique change-of-basis matrix from the term similarity matrix using Cholesky factorization.
+        Setting symmetric to False will significantly reduce memory usage during matrix construction.
+    dominant: bool, optional
+        Whether the strict column diagonal dominance of the term similarity matrix will be enforced.
+        Strict diagonal dominance and symmetry are sufficient preconditions for positive
+        definiteness, which is necessary if you later wish to derive a change-of-basis matrix from
+        the term similarity matrix using Cholesky factorization.
     nonzero_limit : int or None, optional
         The maximum number of non-zero elements outside the diagonal in a single column of the
         sparse term similarity matrix. If None, then no limit will be imposed.
     dtype : numpy.dtype, optional
-        Data-type of the sparse term similarity matrix.
+        The data type of the sparse term similarity matrix.
+    positive_definite: bool or None, optional
+        A deprecated alias for dominant.
 
     Attributes
     ----------
     matrix : :class:`scipy.sparse.csc_matrix`
         The encapsulated sparse term similarity matrix.
-    """
-    PROGRESS_MESSAGE_PERIOD = 1000  # how many columns are processed between progress messages
 
-    def __init__(self, source, dictionary=None, tfidf=None, symmetric=True, positive_definite=False, nonzero_limit=100,
-                 dtype=np.float32):
-        if sparse.issparse(source):
-            self.matrix = source.tocsc()  # encapsulate the passed sparse matrix
-            return
+    Raises
+    ------
+    ValueError
+        If `dictionary` is empty.
+
+    """
+    def __init__(self, source, dictionary=None, tfidf=None, symmetric=True, dominant=False,
+            nonzero_limit=100, dtype=np.float32, positive_definite=None):
 
-        index = source
-        assert isinstance(index, TermSimilarityIndex)
-        assert dictionary is not None
-        matrix_order = len(dictionary)
+        if positive_definite is not None:
+            warnings.warn(
+                'Parameter positive_definite will be removed in 4.0.0, use dominant instead',
+                category=DeprecationWarning,
+            )
+            dominant = positive_definite
 
-        logger.info("constructing a sparse term similarity matrix using %s", index)
+        if not sparse.issparse(source):
+            index = source
+            args = (index, dictionary, tfidf, symmetric, dominant, nonzero_limit, dtype)
+            source = _create_source(*args)
+            assert sparse.issparse(source)
 
-        if nonzero_limit is None:
-            nonzero_limit = matrix_order
+        self.matrix = source.tocsc()
 
-        if tfidf is None:
-            logger.info("iterating over columns in dictionary order")
-            columns = sorted(dictionary.keys())
-        else:
-            assert max(tfidf.idfs) == matrix_order - 1
-            logger.info("iterating over columns in tf-idf order")
-            columns = [
-                term_index for term_index, _
-                in sorted(
-                    tfidf.idfs.items(),
-                    key=lambda x: (lambda term_index, term_idf: (term_idf, -term_index))(*x), reverse=True)]
-
-        column_nonzero = np.array([0] * matrix_order, dtype=_shortest_uint_dtype(nonzero_limit))
-        column_sum = np.zeros(matrix_order, dtype=dtype)
-        matrix = sparse.identity(matrix_order, dtype=dtype, format="dok")
-
-        for column_number, t1_index in enumerate(columns):
-            if column_number % self.PROGRESS_MESSAGE_PERIOD == 0:
-                logger.info(
-                    "PROGRESS: at %.02f%% columns (%d / %d, %.06f%% density, "
-                    "%.06f%% projected density)",
-                    100.0 * (column_number + 1) / matrix_order, column_number + 1, matrix_order,
-                    100.0 * matrix.getnnz() / matrix_order**2,
-                    100.0 * np.clip(
-                        (1.0 * (matrix.getnnz() - matrix_order) / matrix_order**2)
-                        * (1.0 * matrix_order / (column_number + 1))
-                        + (1.0 / matrix_order),  # add density correspoding to the main diagonal
-                        0.0, 1.0))
-
-            t1 = dictionary[t1_index]
-            num_nonzero = column_nonzero[t1_index]
-            num_rows = nonzero_limit - num_nonzero
-            most_similar = [
-                (dictionary.token2id[term], similarity)
-                for term, similarity in index.most_similar(t1, topn=num_rows)
-                if term in dictionary.token2id
-            ] if num_rows > 0 else []
-
-            if tfidf is None:
-                rows = sorted(most_similar)
-            else:
-                rows = sorted(
-                    most_similar,
-                    key=lambda x: (lambda term_index, _: (tfidf.idfs[term_index], -term_index))(*x), reverse=True)
-
-            for row_number, (t2_index, similarity) in zip(range(num_rows), rows):
-                if positive_definite and column_sum[t1_index] + abs(similarity) >= 1.0:
-                    break
-                if symmetric:
-                    if column_nonzero[t2_index] < nonzero_limit \
-                            and (not positive_definite or column_sum[t2_index] + abs(similarity) < 1.0) \
-                            and not (t1_index, t2_index) in matrix:
-                        matrix[t1_index, t2_index] = similarity
-                        column_nonzero[t1_index] += 1
-                        column_sum[t1_index] += abs(similarity)
-                        matrix[t2_index, t1_index] = similarity
-                        column_nonzero[t2_index] += 1
-                        column_sum[t2_index] += abs(similarity)
-                else:
-                    matrix[t1_index, t2_index] = similarity
-                    column_sum[t1_index] += abs(similarity)
-
-        logger.info(
-            "constructed a sparse term similarity matrix with %0.06f%% density",
-            100.0 * matrix.getnnz() / matrix_order**2)
-
-        matrix = matrix.T
-        assert sparse.issparse(matrix)
-        self.__init__(matrix)
-
-    def inner_product(self, X, Y, normalized=False):
+    def inner_product(self, X, Y, normalized=(False, False)):
         """Get the inner product(s) between real vectors / corpora X and Y.
 
         Return the inner product(s) between real vectors / corpora vec1 and vec2 expressed in a
@@ -323,10 +502,11 @@ def inner_product(self, X, Y, normalized=False):
             A query vector / corpus in the sparse bag-of-words format.
         vec2 : list of (int, float) or iterable of list of (int, float)
             A document vector / corpus in the sparse bag-of-words format.
-        normalized : bool, optional
-            Whether the inner product should be L2-normalized. The normalized inner product
-            corresponds to the Soft Cosine Measure (SCM). SCM is a number between <-1.0, 1.0>,
-            where higher is more similar.
+        normalized : tuple of {True, False, 'maintain'}, optional
+            First/second value specifies whether the query/document vectors in the inner product
+            will be L2-normalized (True; corresponds to the soft cosine measure), maintain their
+            L2-norm during change of basis ('maintain'; corresponds to query expansion with partial
+            membership), or kept as-is (False; corresponds to query expansion; default).
 
         Returns
         -------
@@ -336,14 +516,35 @@ def inner_product(self, X, Y, normalized=False):
         References
         ----------
         The soft cosine measure was perhaps first described by [sidorovetal14]_.
+        Further notes on the efficient implementation of the soft cosine measure are described by
+        [novotny18]_.
 
         .. [sidorovetal14] Grigori Sidorov et al., "Soft Similarity and Soft Cosine Measure: Similarity
            of Features in Vector Space Model", 2014, http://www.cys.cic.ipn.mx/ojs/index.php/CyS/article/view/2043/1921.
 
+        .. [novotny18] Vít Novotný, "Implementation Notes for the Soft Cosine Measure", 2018,
+           http://dx.doi.org/10.1145/3269206.3269317.
+
         """
         if not X or not Y:
             return self.matrix.dtype.type(0.0)
 
+        if normalized in (True, False):
+            warnings.warn(
+                'Boolean parameter normalized will be removed in 4.0.0, use '
+                'normalized=(%s, %s) instead of normalized=%s' % tuple([normalized] * 3),
+                category=DeprecationWarning,
+            )
+            normalized = (normalized, normalized)
+
+        normalized_X, normalized_Y = normalized
+        valid_normalized_values = (True, False, 'maintain')
+
+        if normalized_X not in valid_normalized_values:
+            raise ValueError('{} is not a valid value of normalize'.format(normalized_X))
+        if normalized_Y not in valid_normalized_values:
+            raise ValueError('{} is not a valid value of normalize'.format(normalized_Y))
+
         is_corpus_X, X = is_corpus(X)
         is_corpus_Y, Y = is_corpus(Y)
 
@@ -356,24 +557,19 @@ def inner_product(self, X, Y, normalized=False):
             Y = np.array([Y[i] if i in Y else 0 for i in word_indices], dtype=dtype)
             matrix = self.matrix[word_indices[:, None], word_indices].todense()
 
+            X = _normalize_dense_vector(X, matrix, normalized_X)
+            Y = _normalize_dense_vector(Y, matrix, normalized_Y)
             result = X.T.dot(matrix).dot(Y)
 
-            if normalized:
-                X_norm = X.T.dot(matrix).dot(X)[0, 0]
-                Y_norm = Y.T.dot(matrix).dot(Y)[0, 0]
-
-                assert \
-                    X_norm > 0.0 and Y_norm > 0.0, \
-                    u"sparse documents must not contain any explicit zero entries and the similarity matrix S " \
-                    u"must satisfy x^T * S * x > 0 for any nonzero bag-of-words vector x."
-
-                result /= sqrt(X_norm) * sqrt(Y_norm)
+            if normalized_X is True and normalized_Y is True:
                 result = np.clip(result, -1.0, 1.0)
 
             return result[0, 0]
         elif not is_corpus_X or not is_corpus_Y:
             if is_corpus_X and not is_corpus_Y:
-                is_corpus_X, X, is_corpus_Y, Y = is_corpus_Y, Y, is_corpus_X, X  # make Y the corpus
+                X, Y = Y, X  # make Y the corpus
+                is_corpus_X, is_corpus_Y = is_corpus_Y, is_corpus_X
+                normalized_X, normalized_Y = normalized_Y, normalized_X
                 transposed = True
             else:
                 transposed = False
@@ -387,23 +583,12 @@ def inner_product(self, X, Y, normalized=False):
             X = np.array([X[i] if i in X else 0 for i in word_indices], dtype=dtype)
             Y = corpus2csc(Y, num_terms=self.matrix.shape[0], dtype=dtype)[word_indices, :].todense()
             matrix = self.matrix[word_indices[:, None], word_indices].todense()
-            if normalized:
-                # use the following equality: np.diag(A.T.dot(B).dot(A)) == A.T.dot(B).multiply(A.T).sum(axis=1).T
-                X_norm = np.multiply(X.T.dot(matrix), X.T).sum(axis=1).T
-                Y_norm = np.multiply(Y.T.dot(matrix), Y.T).sum(axis=1).T
-
-                assert \
-                    X_norm.min() > 0.0 and Y_norm.min() >= 0.0, \
-                    u"sparse documents must not contain any explicit zero entries and the similarity matrix S " \
-                    u"must satisfy x^T * S * x > 0 for any nonzero bag-of-words vector x."
-
-                X = np.multiply(X, 1 / np.sqrt(X_norm)).T
-                Y = np.multiply(Y, 1 / np.sqrt(Y_norm))
-                Y = np.nan_to_num(Y)  # Account for division by zero when Y_norm.min() == 0.0
 
-            result = X.T.dot(matrix).dot(Y)
+            X = _normalize_dense_vector(X, matrix, normalized_X)
+            Y = _normalize_dense_corpus(Y, matrix, normalized_Y)
+            result = X.dot(matrix).dot(Y)
 
-            if normalized:
+            if normalized_X is True and normalized_Y is True:
                 result = np.clip(result, -1.0, 1.0)
 
             if transposed:
@@ -416,23 +601,11 @@ def inner_product(self, X, Y, normalized=False):
             Y = corpus2csc(Y if is_corpus_Y else [Y], num_terms=self.matrix.shape[0], dtype=dtype)
             matrix = self.matrix
 
-            if normalized:
-                # use the following equality: np.diag(A.T.dot(B).dot(A)) == A.T.dot(B).multiply(A.T).sum(axis=1).T
-                X_norm = X.T.dot(matrix).multiply(X.T).sum(axis=1).T
-                Y_norm = Y.T.dot(matrix).multiply(Y.T).sum(axis=1).T
-
-                assert \
-                    X_norm.min() > 0.0 and Y_norm.min() >= 0.0, \
-                    u"sparse documents must not contain any explicit zero entries and the similarity matrix S " \
-                    u"must satisfy x^T * S * x > 0 for any nonzero bag-of-words vector x."
-
-                X = X.multiply(sparse.csr_matrix(1 / np.sqrt(X_norm)))
-                Y = Y.multiply(sparse.csr_matrix(1 / np.sqrt(Y_norm)))
-                Y[Y == np.inf] = 0  # Account for division by zero when Y_norm.min() == 0.0
-
+            X = _normalize_sparse_corpus(X, matrix, normalized_X)
+            Y = _normalize_sparse_corpus(Y, matrix, normalized_Y)
             result = X.T.dot(matrix).dot(Y)
 
-            if normalized:
+            if normalized_X is True and normalized_Y is True:
                 result.data = np.clip(result.data, -1.0, 1.0)
 
             return result
diff --git a/gensim/test/test_similarities.py b/gensim/test/test_similarities.py
index fbd8f53ade..6a898f1a67 100644
--- a/gensim/test/test_similarities.py
+++ b/gensim/test/test_similarities.py
@@ -876,7 +876,16 @@ def setUp(self):
             [u"government", u"denied", u"holiday", u"slowing", u"hollingworth"]]
         self.dictionary = Dictionary(self.documents)
         self.tfidf = TfidfModel(dictionary=self.dictionary)
+        zero_index = UniformTermSimilarityIndex(self.dictionary, term_similarity=0.0)
         self.index = UniformTermSimilarityIndex(self.dictionary, term_similarity=0.5)
+        self.identity_matrix = SparseTermSimilarityMatrix(zero_index, self.dictionary)
+        self.uniform_matrix = SparseTermSimilarityMatrix(self.index, self.dictionary)
+        self.vec1 = self.dictionary.doc2bow([u"government", u"government", u"denied"])
+        self.vec2 = self.dictionary.doc2bow([u"government", u"holiday"])
+
+    def test_empty_dictionary(self):
+        with self.assertRaises(ValueError):
+            SparseTermSimilarityMatrix(self.index, [])
 
     def test_type(self):
         """Test the type of the produced matrix."""
@@ -942,6 +951,29 @@ def test_symmetric(self):
             [0.0, 0.0, 0.0, 0.0, 1.0]])
         self.assertTrue(numpy.all(expected_matrix == matrix))
 
+    def test_dominant(self):
+        """Test the dominant parameter of the matrix constructor."""
+        negative_index = UniformTermSimilarityIndex(self.dictionary, term_similarity=-0.5)
+        matrix = SparseTermSimilarityMatrix(
+            negative_index, self.dictionary, nonzero_limit=2).matrix.todense()
+        expected_matrix = numpy.array([
+            [1.0, -.5, -.5, 0.0, 0.0],
+            [-.5, 1.0, 0.0, -.5, 0.0],
+            [-.5, 0.0, 1.0, 0.0, 0.0],
+            [0.0, -.5, 0.0, 1.0, 0.0],
+            [0.0, 0.0, 0.0, 0.0, 1.0]])
+        self.assertTrue(numpy.all(expected_matrix == matrix))
+
+        matrix = SparseTermSimilarityMatrix(
+            negative_index, self.dictionary, nonzero_limit=2, dominant=True).matrix.todense()
+        expected_matrix = numpy.array([
+            [1.0, -.5, 0.0, 0.0, 0.0],
+            [-.5, 1.0, 0.0, 0.0, 0.0],
+            [0.0, 0.0, 1.0, 0.0, 0.0],
+            [0.0, 0.0, 0.0, 1.0, 0.0],
+            [0.0, 0.0, 0.0, 0.0, 1.0]])
+        self.assertTrue(numpy.all(expected_matrix == matrix))
+
     def test_positive_definite(self):
         """Test the positive_definite parameter of the matrix constructor."""
         negative_index = UniformTermSimilarityIndex(self.dictionary, term_similarity=-0.5)
@@ -1004,109 +1036,533 @@ def test_encapsulation(self):
         self.assertTrue(isinstance(matrix, scipy.sparse.csc_matrix))
         self.assertTrue(numpy.all(matrix.todense() == expected_matrix))
 
-    def test_inner_product(self):
-        """Test the inner product."""
+    def test_inner_product_zerovector_zerovector_default(self):
+        """Test the inner product between two zero vectors with the default normalization."""
 
-        matrix = SparseTermSimilarityMatrix(
-            UniformTermSimilarityIndex(self.dictionary, term_similarity=0.5), self.dictionary)
+        self.assertEqual(0.0, self.uniform_matrix.inner_product([], []))
+
+    def test_inner_product_zerovector_zerovector_false_maintain(self):
+        """Test the inner product between two zero vectors with the (False, 'maintain') normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product([], [], normalized=(False, 'maintain')))
+
+    def test_inner_product_zerovector_zerovector_false_true(self):
+        """Test the inner product between two zero vectors with the (False, True) normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product([], [], normalized=(False, True)))
+
+    def test_inner_product_zerovector_zerovector_maintain_false(self):
+        """Test the inner product between two zero vectors with the ('maintain', False) normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product([], [], normalized=('maintain', False)))
+
+    def test_inner_product_zerovector_zerovector_maintain_maintain(self):
+        """Test the inner product between two zero vectors with the ('maintain', 'maintain') normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product([], [], normalized=('maintain', 'maintain')))
+
+    def test_inner_product_zerovector_zerovector_maintain_true(self):
+        """Test the inner product between two zero vectors with the ('maintain', True) normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product([], [], normalized=('maintain', True)))
+
+    def test_inner_product_zerovector_zerovector_true_false(self):
+        """Test the inner product between two zero vectors with the (True, False) normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product([], [], normalized=(True, False)))
+
+    def test_inner_product_zerovector_zerovector_true_maintain(self):
+        """Test the inner product between two zero vectors with the (True, 'maintain') normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product([], [], normalized=(True, 'maintain')))
+
+    def test_inner_product_zerovector_zerovector_true_true(self):
+        """Test the inner product between two zero vectors with the (True, True) normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product([], [], normalized=(True, True)))
+
+    def test_inner_product_zerovector_vector_default(self):
+        """Test the inner product between a zero vector and a vector with the default normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product([], self.vec2))
+
+    def test_inner_product_zerovector_vector_false_maintain(self):
+        """Test the inner product between a zero vector and a vector with the (False, 'maintain') normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product([], self.vec2, normalized=(False, 'maintain')))
+
+    def test_inner_product_zerovector_vector_false_true(self):
+        """Test the inner product between a zero vector and a vector with the (False, True) normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product([], self.vec2, normalized=(False, True)))
+
+    def test_inner_product_zerovector_vector_maintain_false(self):
+        """Test the inner product between a zero vector and a vector with the ('maintain', False) normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product([], self.vec2, normalized=('maintain', False)))
+
+    def test_inner_product_zerovector_vector_maintain_maintain(self):
+        """Test the inner product between a zero vector and a vector with the ('maintain', 'maintain') normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product([], self.vec2, normalized=('maintain', 'maintain')))
+
+    def test_inner_product_zerovector_vector_maintain_true(self):
+        """Test the inner product between a zero vector and a vector with the ('maintain', True) normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product([], self.vec2, normalized=('maintain', True)))
+
+    def test_inner_product_zerovector_vector_true_false(self):
+        """Test the inner product between a zero vector and a vector with the (True, False) normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product([], self.vec2, normalized=(True, False)))
 
-        # check zero vectors work as expected
-        vec1 = self.dictionary.doc2bow([u"government", u"government", u"denied"])
-        vec2 = self.dictionary.doc2bow([u"government", u"holiday"])
+    def test_inner_product_zerovector_vector_true_maintain(self):
+        """Test the inner product between a zero vector and a vector with the (True, 'maintain') normalization."""
 
-        self.assertEqual(0.0, matrix.inner_product([], vec2))
-        self.assertEqual(0.0, matrix.inner_product(vec1, []))
-        self.assertEqual(0.0, matrix.inner_product([], []))
+        self.assertEqual(0.0, self.uniform_matrix.inner_product([], self.vec2, normalized=(True, 'maintain')))
 
-        self.assertEqual(0.0, matrix.inner_product([], vec2, normalized=True))
-        self.assertEqual(0.0, matrix.inner_product(vec1, [], normalized=True))
-        self.assertEqual(0.0, matrix.inner_product([], [], normalized=True))
+    def test_inner_product_zerovector_vector_true_true(self):
+        """Test the inner product between a zero vector and a vector with the (True, True) normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product([], self.vec2, normalized=(True, True)))
+
+    def test_inner_product_vector_zerovector_default(self):
+        """Test the inner product between a vector and a zero vector with the default normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product(self.vec1, []))
+
+    def test_inner_product_vector_zerovector_false_maintain(self):
+        """Test the inner product between a vector and a zero vector with the (False, 'maintain') normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product(self.vec1, [], normalized=(False, 'maintain')))
+
+    def test_inner_product_vector_zerovector_false_true(self):
+        """Test the inner product between a vector and a zero vector with the (False, True) normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product(self.vec1, [], normalized=(False, True)))
+
+    def test_inner_product_vector_zerovector_maintain_false(self):
+        """Test the inner product between a vector and a zero vector with the ('maintain', False) normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product(self.vec1, [], normalized=('maintain', False)))
+
+    def test_inner_product_vector_zerovector_maintain_maintain(self):
+        """Test the inner product between a vector and a zero vector with the ('maintain', 'maintain') normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product(self.vec1, [], normalized=('maintain', 'maintain')))
+
+    def test_inner_product_vector_zerovector_maintain_true(self):
+        """Test the inner product between a vector and a zero vector with the ('maintain', True) normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product(self.vec1, [], normalized=('maintain', True)))
+
+    def test_inner_product_vector_zerovector_true_false(self):
+        """Test the inner product between a vector and a zero vector with the (True, False) normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product(self.vec1, [], normalized=(True, False)))
+
+    def test_inner_product_vector_zerovector_true_maintain(self):
+        """Test the inner product between a vector and a zero vector with the (True, 'maintain') normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product(self.vec1, [], normalized=(True, 'maintain')))
+
+    def test_inner_product_vector_zerovector_true_true(self):
+        """Test the inner product between a vector and a zero vector with the (True, True) normalization."""
+
+        self.assertEqual(0.0, self.uniform_matrix.inner_product(self.vec1, [], normalized=(True, True)))
+
+    def test_inner_product_vector_vector_default(self):
+        """Test the inner product between two vectors with the default normalization."""
 
-        # check that real-world vectors work as expected
-        vec1 = self.dictionary.doc2bow([u"government", u"government", u"denied"])
-        vec2 = self.dictionary.doc2bow([u"government", u"holiday"])
         expected_result = 0.0
         expected_result += 2 * 1.0 * 1  # government * s_{ij} * government
         expected_result += 2 * 0.5 * 1  # government * s_{ij} * holiday
         expected_result += 1 * 0.5 * 1  # denied * s_{ij} * government
         expected_result += 1 * 0.5 * 1  # denied * s_{ij} * holiday
-        result = matrix.inner_product(vec1, vec2)
+        result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
         self.assertAlmostEqual(expected_result, result, places=5)
 
-        vec1 = self.dictionary.doc2bow([u"government", u"government", u"denied"])
-        vec2 = self.dictionary.doc2bow([u"government", u"holiday"])
-        expected_result = matrix.inner_product(vec1, vec2)
-        expected_result /= math.sqrt(matrix.inner_product(vec1, vec1))
-        expected_result /= math.sqrt(matrix.inner_product(vec2, vec2))
-        result = matrix.inner_product(vec1, vec2, normalized=True)
+    def test_inner_product_vector_vector_false_maintain(self):
+        """Test the inner product between two vectors with the (False, 'maintain') normalization."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec2, self.vec2))
+        expected_result *= math.sqrt(self.identity_matrix.inner_product(self.vec2, self.vec2))
+        result = self.uniform_matrix.inner_product(self.vec1, self.vec2, normalized=(False, 'maintain'))
+        self.assertAlmostEqual(expected_result, result, places=5)
+
+    def test_inner_product_vector_vector_false_true(self):
+        """Test the inner product between two vectors with the (False, True) normalization."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec2, self.vec2))
+        result = self.uniform_matrix.inner_product(self.vec1, self.vec2, normalized=(False, True))
         self.assertAlmostEqual(expected_result, result, places=5)
 
-        # check that real-world (vector, corpus) pairs work as expected
-        vec1 = self.dictionary.doc2bow([u"government", u"government", u"denied"])
-        vec2 = self.dictionary.doc2bow([u"government", u"holiday"])
+    def test_inner_product_vector_vector_maintain_false(self):
+        """Test the inner product between two vectors with the ('maintain', False) normalization."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec1, self.vec1))
+        expected_result *= math.sqrt(self.identity_matrix.inner_product(self.vec1, self.vec1))
+        result = self.uniform_matrix.inner_product(self.vec1, self.vec2, normalized=('maintain', False))
+        self.assertAlmostEqual(expected_result, result, places=5)
+
+    def test_inner_product_vector_vector_maintain_maintain(self):
+        """Test the inner product between two vectors with the ('maintain', 'maintain') normalization."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec1, self.vec1))
+        expected_result *= math.sqrt(self.identity_matrix.inner_product(self.vec1, self.vec1))
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec2, self.vec2))
+        expected_result *= math.sqrt(self.identity_matrix.inner_product(self.vec2, self.vec2))
+        result = self.uniform_matrix.inner_product(self.vec1, self.vec2, normalized=('maintain', 'maintain'))
+        self.assertAlmostEqual(expected_result, result, places=5)
+
+    def test_inner_product_vector_vector_maintain_true(self):
+        """Test the inner product between two vectors with the ('maintain', True) normalization."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec1, self.vec1))
+        expected_result *= math.sqrt(self.identity_matrix.inner_product(self.vec1, self.vec1))
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec2, self.vec2))
+        result = self.uniform_matrix.inner_product(self.vec1, self.vec2, normalized=('maintain', True))
+        self.assertAlmostEqual(expected_result, result, places=5)
+
+    def test_inner_product_vector_vector_true_false(self):
+        """Test the inner product between two vectors with the (True, False) normalization."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec1, self.vec1))
+        result = self.uniform_matrix.inner_product(self.vec1, self.vec2, normalized=(True, False))
+        self.assertAlmostEqual(expected_result, result, places=5)
+
+    def test_inner_product_vector_vector_true_maintain(self):
+        """Test the inner product between two vectors with the (True, 'maintain') normalization."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec1, self.vec1))
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec2, self.vec2))
+        expected_result *= math.sqrt(self.identity_matrix.inner_product(self.vec2, self.vec2))
+        result = self.uniform_matrix.inner_product(self.vec1, self.vec2, normalized=(True, 'maintain'))
+        self.assertAlmostEqual(expected_result, result, places=5)
+
+    def test_inner_product_vector_vector_true_true(self):
+        """Test the inner product between two vectors with the (True, True) normalization."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec1, self.vec1))
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec2, self.vec2))
+        result = self.uniform_matrix.inner_product(self.vec1, self.vec2, normalized=(True, True))
+        self.assertAlmostEqual(expected_result, result, places=5)
+
+    def test_inner_product_vector_corpus_default(self):
+        """Test the inner product between a vector and a corpus with the default normalization."""
+
         expected_result = 0.0
         expected_result += 2 * 1.0 * 1  # government * s_{ij} * government
         expected_result += 2 * 0.5 * 1  # government * s_{ij} * holiday
         expected_result += 1 * 0.5 * 1  # denied * s_{ij} * government
         expected_result += 1 * 0.5 * 1  # denied * s_{ij} * holiday
         expected_result = numpy.full((1, 2), expected_result)
-        result = matrix.inner_product(vec1, [vec2] * 2)
+        result = self.uniform_matrix.inner_product(self.vec1, [self.vec2] * 2)
+        self.assertTrue(isinstance(result, numpy.ndarray))
+        self.assertTrue(numpy.allclose(expected_result, result))
+
+    def test_inner_product_vector_corpus_false_maintain(self):
+        """Test the inner product between a vector and a corpus with the (False, 'maintain') normalization."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec2, self.vec2))
+        expected_result *= math.sqrt(self.identity_matrix.inner_product(self.vec2, self.vec2))
+        expected_result = numpy.full((1, 2), expected_result)
+        result = self.uniform_matrix.inner_product(self.vec1, [self.vec2] * 2, normalized=(False, 'maintain'))
         self.assertTrue(isinstance(result, numpy.ndarray))
         self.assertTrue(numpy.allclose(expected_result, result))
 
-        vec1 = self.dictionary.doc2bow([u"government", u"government", u"denied"])
-        vec2 = self.dictionary.doc2bow([u"government", u"holiday"])
-        expected_result = matrix.inner_product(vec1, vec2)
-        expected_result /= math.sqrt(matrix.inner_product(vec1, vec1))
-        expected_result /= math.sqrt(matrix.inner_product(vec2, vec2))
+    def test_inner_product_vector_corpus_false_true(self):
+        """Test the inner product between a vector and a corpus with the (False, True) normalization."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec2, self.vec2))
         expected_result = numpy.full((1, 2), expected_result)
-        result = matrix.inner_product(vec1, [vec2] * 2, normalized=True)
+        result = self.uniform_matrix.inner_product(self.vec1, [self.vec2] * 2, normalized=(False, True))
         self.assertTrue(isinstance(result, numpy.ndarray))
         self.assertTrue(numpy.allclose(expected_result, result))
 
-        # check that real-world (corpus, vector) pairs work as expected
-        vec1 = self.dictionary.doc2bow([u"government", u"government", u"denied"])
-        vec2 = self.dictionary.doc2bow([u"government", u"holiday"])
+    def test_inner_product_vector_corpus_maintain_false(self):
+        """Test the inner product between a vector and a corpus with the ('maintain', False) normalization."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec1, self.vec1))
+        expected_result *= math.sqrt(self.identity_matrix.inner_product(self.vec1, self.vec1))
+        expected_result = numpy.full((1, 2), expected_result)
+        result = self.uniform_matrix.inner_product(self.vec1, [self.vec2] * 2, normalized=('maintain', False))
+        self.assertTrue(isinstance(result, numpy.ndarray))
+        self.assertTrue(numpy.allclose(expected_result, result))
+
+    def test_inner_product_vector_corpus_maintain_maintain(self):
+        """Test the inner product between a vector and a corpus with the ('maintain', 'maintain') normalization."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec1, self.vec1))
+        expected_result *= math.sqrt(self.identity_matrix.inner_product(self.vec1, self.vec1))
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec2, self.vec2))
+        expected_result *= math.sqrt(self.identity_matrix.inner_product(self.vec2, self.vec2))
+        expected_result = numpy.full((1, 2), expected_result)
+        result = self.uniform_matrix.inner_product(self.vec1, [self.vec2] * 2, normalized=('maintain', 'maintain'))
+        self.assertTrue(isinstance(result, numpy.ndarray))
+        self.assertTrue(numpy.allclose(expected_result, result))
+
+    def test_inner_product_vector_corpus_maintain_true(self):
+        """Test the inner product between a vector and a corpus with the ('maintain', True) normalization."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec1, self.vec1))
+        expected_result *= math.sqrt(self.identity_matrix.inner_product(self.vec1, self.vec1))
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec2, self.vec2))
+        expected_result = numpy.full((1, 2), expected_result)
+        result = self.uniform_matrix.inner_product(self.vec1, [self.vec2] * 2, normalized=('maintain', True))
+        self.assertTrue(isinstance(result, numpy.ndarray))
+        self.assertTrue(numpy.allclose(expected_result, result))
+
+    def test_inner_product_vector_corpus_true_false(self):
+        """Test the inner product between a vector and a corpus with the (True, False) normalization."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec1, self.vec1))
+        expected_result = numpy.full((1, 2), expected_result)
+        result = self.uniform_matrix.inner_product(self.vec1, [self.vec2] * 2, normalized=(True, False))
+        self.assertTrue(isinstance(result, numpy.ndarray))
+        self.assertTrue(numpy.allclose(expected_result, result))
+
+    def test_inner_product_vector_corpus_true_maintain(self):
+        """Test the inner product between a vector and a corpus with the (True, 'maintain') normalization."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec1, self.vec1))
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec2, self.vec2))
+        expected_result *= math.sqrt(self.identity_matrix.inner_product(self.vec2, self.vec2))
+        expected_result = numpy.full((1, 2), expected_result)
+        result = self.uniform_matrix.inner_product(self.vec1, [self.vec2] * 2, normalized=(True, 'maintain'))
+        self.assertTrue(isinstance(result, numpy.ndarray))
+        self.assertTrue(numpy.allclose(expected_result, result))
+
+    def test_inner_product_vector_corpus_true_true(self):
+        """Test the inner product between a vector and a corpus with the (True, True) normalization."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec1, self.vec1))
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec2, self.vec2))
+        expected_result = numpy.full((1, 2), expected_result)
+        result = self.uniform_matrix.inner_product(self.vec1, [self.vec2] * 2, normalized=(True, True))
+        self.assertTrue(isinstance(result, numpy.ndarray))
+        self.assertTrue(numpy.allclose(expected_result, result))
+
+    def test_inner_product_corpus_vector_default(self):
+        """Test the inner product between a corpus and a vector with the default normalization."""
+
         expected_result = 0.0
         expected_result += 2 * 1.0 * 1  # government * s_{ij} * government
         expected_result += 2 * 0.5 * 1  # government * s_{ij} * holiday
         expected_result += 1 * 0.5 * 1  # denied * s_{ij} * government
         expected_result += 1 * 0.5 * 1  # denied * s_{ij} * holiday
         expected_result = numpy.full((3, 1), expected_result)
-        result = matrix.inner_product([vec1] * 3, vec2)
+        result = self.uniform_matrix.inner_product([self.vec1] * 3, self.vec2)
+        self.assertTrue(isinstance(result, numpy.ndarray))
+        self.assertTrue(numpy.allclose(expected_result, result))
+
+    def test_inner_product_corpus_vector_false_maintain(self):
+        """Test the inner product between a corpus and a vector with the (False, 'maintain') normalization."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec2, self.vec2))
+        expected_result *= math.sqrt(self.identity_matrix.inner_product(self.vec2, self.vec2))
+        expected_result = numpy.full((3, 1), expected_result)
+        result = self.uniform_matrix.inner_product([self.vec1] * 3, self.vec2, normalized=(False, 'maintain'))
+        self.assertTrue(isinstance(result, numpy.ndarray))
+        self.assertTrue(numpy.allclose(expected_result, result))
+
+    def test_inner_product_corpus_vector_false_true(self):
+        """Test the inner product between a corpus and a vector with the (False, True) normalization."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec2, self.vec2))
+        expected_result = numpy.full((3, 1), expected_result)
+        result = self.uniform_matrix.inner_product([self.vec1] * 3, self.vec2, normalized=(False, True))
+        self.assertTrue(isinstance(result, numpy.ndarray))
+        self.assertTrue(numpy.allclose(expected_result, result))
+
+    def test_inner_product_corpus_vector_maintain_false(self):
+        """Test the inner product between a corpus and a vector with the ('maintain', False) normalization."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec1, self.vec1))
+        expected_result *= math.sqrt(self.identity_matrix.inner_product(self.vec1, self.vec1))
+        expected_result = numpy.full((3, 1), expected_result)
+        result = self.uniform_matrix.inner_product([self.vec1] * 3, self.vec2, normalized=('maintain', False))
+        self.assertTrue(isinstance(result, numpy.ndarray))
+        self.assertTrue(numpy.allclose(expected_result, result))
+
+    def test_inner_product_corpus_vector_maintain_maintain(self):
+        """Test the inner product between a corpus and a vector with the ('maintain', 'maintain') normalization."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec1, self.vec1))
+        expected_result *= math.sqrt(self.identity_matrix.inner_product(self.vec1, self.vec1))
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec2, self.vec2))
+        expected_result *= math.sqrt(self.identity_matrix.inner_product(self.vec2, self.vec2))
+        expected_result = numpy.full((3, 1), expected_result)
+        result = self.uniform_matrix.inner_product([self.vec1] * 3, self.vec2, normalized=('maintain', 'maintain'))
+        self.assertTrue(isinstance(result, numpy.ndarray))
+        self.assertTrue(numpy.allclose(expected_result, result))
+
+    def test_inner_product_corpus_vector_maintain_true(self):
+        """Test the inner product between a corpus and a vector with the ('maintain', True) normalization."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec1, self.vec1))
+        expected_result *= math.sqrt(self.identity_matrix.inner_product(self.vec1, self.vec1))
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec2, self.vec2))
+        expected_result = numpy.full((3, 1), expected_result)
+        result = self.uniform_matrix.inner_product([self.vec1] * 3, self.vec2, normalized=('maintain', True))
+        self.assertTrue(isinstance(result, numpy.ndarray))
+        self.assertTrue(numpy.allclose(expected_result, result))
+
+    def test_inner_product_corpus_vector_true_false(self):
+        """Test the inner product between a corpus and a vector with the (True, False) normalization."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec1, self.vec1))
+        expected_result = numpy.full((3, 1), expected_result)
+        result = self.uniform_matrix.inner_product([self.vec1] * 3, self.vec2, normalized=(True, False))
+        self.assertTrue(isinstance(result, numpy.ndarray))
+        self.assertTrue(numpy.allclose(expected_result, result))
+
+    def test_inner_product_corpus_vector_true_maintain(self):
+        """Test the inner product between a corpus and a vector with the (True, 'maintain') normalization."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec1, self.vec1))
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec2, self.vec2))
+        expected_result *= math.sqrt(self.identity_matrix.inner_product(self.vec2, self.vec2))
+        expected_result = numpy.full((3, 1), expected_result)
+        result = self.uniform_matrix.inner_product([self.vec1] * 3, self.vec2, normalized=(True, 'maintain'))
         self.assertTrue(isinstance(result, numpy.ndarray))
         self.assertTrue(numpy.allclose(expected_result, result))
 
-        vec1 = self.dictionary.doc2bow([u"government", u"government", u"denied"])
-        vec2 = self.dictionary.doc2bow([u"government", u"holiday"])
-        expected_result = matrix.inner_product(vec1, vec2)
-        expected_result /= math.sqrt(matrix.inner_product(vec1, vec1))
-        expected_result /= math.sqrt(matrix.inner_product(vec2, vec2))
+    def test_inner_product_corpus_vector_true_true(self):
+        """Test the inner product between a corpus and a vector with the (True, True) normalization."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec1, self.vec1))
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec2, self.vec2))
         expected_result = numpy.full((3, 1), expected_result)
-        result = matrix.inner_product([vec1] * 3, vec2, normalized=True)
+        result = self.uniform_matrix.inner_product([self.vec1] * 3, self.vec2, normalized=(True, True))
         self.assertTrue(isinstance(result, numpy.ndarray))
         self.assertTrue(numpy.allclose(expected_result, result))
 
-        # check that real-world corpora work as expected
-        vec1 = self.dictionary.doc2bow([u"government", u"government", u"denied"])
-        vec2 = self.dictionary.doc2bow([u"government", u"holiday"])
+    def test_inner_product_corpus_corpus_default(self):
+        """Test the inner product between two corpora with the default normalization."""
+
         expected_result = 0.0
         expected_result += 2 * 1.0 * 1  # government * s_{ij} * government
         expected_result += 2 * 0.5 * 1  # government * s_{ij} * holiday
         expected_result += 1 * 0.5 * 1  # denied * s_{ij} * government
         expected_result += 1 * 0.5 * 1  # denied * s_{ij} * holiday
         expected_result = numpy.full((3, 2), expected_result)
-        result = matrix.inner_product([vec1] * 3, [vec2] * 2)
+        result = self.uniform_matrix.inner_product([self.vec1] * 3, [self.vec2] * 2)
+        self.assertTrue(isinstance(result, scipy.sparse.csr_matrix))
+        self.assertTrue(numpy.allclose(expected_result, result.todense()))
+
+    def test_inner_product_corpus_corpus_false_maintain(self):
+        """Test the inner product between two corpora with the (False, 'maintain')."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec2, self.vec2))
+        expected_result *= math.sqrt(self.identity_matrix.inner_product(self.vec2, self.vec2))
+        expected_result = numpy.full((3, 2), expected_result)
+        result = self.uniform_matrix.inner_product([self.vec1] * 3, [self.vec2] * 2, normalized=(False, 'maintain'))
         self.assertTrue(isinstance(result, scipy.sparse.csr_matrix))
         self.assertTrue(numpy.allclose(expected_result, result.todense()))
 
-        vec1 = self.dictionary.doc2bow([u"government", u"government", u"denied"])
-        vec2 = self.dictionary.doc2bow([u"government", u"holiday"])
-        expected_result = matrix.inner_product(vec1, vec2)
-        expected_result /= math.sqrt(matrix.inner_product(vec1, vec1))
-        expected_result /= math.sqrt(matrix.inner_product(vec2, vec2))
+    def test_inner_product_corpus_corpus_false_true(self):
+        """Test the inner product between two corpora with the (False, True)."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec2, self.vec2))
+        expected_result = numpy.full((3, 2), expected_result)
+        result = self.uniform_matrix.inner_product([self.vec1] * 3, [self.vec2] * 2, normalized=(False, True))
+        self.assertTrue(isinstance(result, scipy.sparse.csr_matrix))
+        self.assertTrue(numpy.allclose(expected_result, result.todense()))
+
+    def test_inner_product_corpus_corpus_maintain_false(self):
+        """Test the inner product between two corpora with the ('maintain', False)."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec1, self.vec1))
+        expected_result *= math.sqrt(self.identity_matrix.inner_product(self.vec1, self.vec1))
+        expected_result = numpy.full((3, 2), expected_result)
+        result = self.uniform_matrix.inner_product([self.vec1] * 3, [self.vec2] * 2, normalized=('maintain', False))
+        self.assertTrue(isinstance(result, scipy.sparse.csr_matrix))
+        self.assertTrue(numpy.allclose(expected_result, result.todense()))
+
+    def test_inner_product_corpus_corpus_maintain_maintain(self):
+        """Test the inner product between two corpora with the ('maintain', 'maintain')."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec1, self.vec1))
+        expected_result *= math.sqrt(self.identity_matrix.inner_product(self.vec1, self.vec1))
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec2, self.vec2))
+        expected_result *= math.sqrt(self.identity_matrix.inner_product(self.vec2, self.vec2))
+        expected_result = numpy.full((3, 2), expected_result)
+        result = self.uniform_matrix.inner_product([self.vec1] * 3, [self.vec2] * 2,
+            normalized=('maintain', 'maintain'))
+        self.assertTrue(isinstance(result, scipy.sparse.csr_matrix))
+        self.assertTrue(numpy.allclose(expected_result, result.todense()))
+
+    def test_inner_product_corpus_corpus_maintain_true(self):
+        """Test the inner product between two corpora with the ('maintain', True)."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec1, self.vec1))
+        expected_result *= math.sqrt(self.identity_matrix.inner_product(self.vec1, self.vec1))
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec2, self.vec2))
+        expected_result = numpy.full((3, 2), expected_result)
+        result = self.uniform_matrix.inner_product([self.vec1] * 3, [self.vec2] * 2, normalized=('maintain', True))
+        self.assertTrue(isinstance(result, scipy.sparse.csr_matrix))
+        self.assertTrue(numpy.allclose(expected_result, result.todense()))
+
+    def test_inner_product_corpus_corpus_true_false(self):
+        """Test the inner product between two corpora with the (True, False)."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec1, self.vec1))
+        expected_result = numpy.full((3, 2), expected_result)
+        result = self.uniform_matrix.inner_product([self.vec1] * 3, [self.vec2] * 2, normalized=(True, False))
+        self.assertTrue(isinstance(result, scipy.sparse.csr_matrix))
+        self.assertTrue(numpy.allclose(expected_result, result.todense()))
+
+    def test_inner_product_corpus_corpus_true_maintain(self):
+        """Test the inner product between two corpora with the (True, 'maintain')."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec1, self.vec1))
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec2, self.vec2))
+        expected_result *= math.sqrt(self.identity_matrix.inner_product(self.vec2, self.vec2))
+        expected_result = numpy.full((3, 2), expected_result)
+        result = self.uniform_matrix.inner_product([self.vec1] * 3, [self.vec2] * 2, normalized=(True, 'maintain'))
+        self.assertTrue(isinstance(result, scipy.sparse.csr_matrix))
+        self.assertTrue(numpy.allclose(expected_result, result.todense()))
+
+    def test_inner_product_corpus_corpus_true_true(self):
+        """Test the inner product between two corpora with the (True, True)."""
+
+        expected_result = self.uniform_matrix.inner_product(self.vec1, self.vec2)
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec1, self.vec1))
+        expected_result /= math.sqrt(self.uniform_matrix.inner_product(self.vec2, self.vec2))
         expected_result = numpy.full((3, 2), expected_result)
-        result = matrix.inner_product([vec1] * 3, [vec2] * 2, normalized=True)
+        result = self.uniform_matrix.inner_product([self.vec1] * 3, [self.vec2] * 2, normalized=(True, True))
         self.assertTrue(isinstance(result, scipy.sparse.csr_matrix))
         self.assertTrue(numpy.allclose(expected_result, result.todense()))