diff --git a/docs/src/auto_examples/index.rst b/docs/src/auto_examples/index.rst
index cbe06b6fb5..ca3c1ec019 100644
--- a/docs/src/auto_examples/index.rst
+++ b/docs/src/auto_examples/index.rst
@@ -13,7 +13,7 @@ If you're thinking about contributing documentation, please see :ref:`sphx_glr_a
 
 .. raw:: html
 
-    <div style='clear:both'></div>
+    <div class="sphx-glr-clear"></div>
 
 
 
@@ -33,9 +33,10 @@ Understanding this functionality is vital for using gensim effectively.
 
 .. only:: html
 
-    .. figure:: /auto_examples/core/images/thumb/sphx_glr_run_core_concepts_thumb.png
+ .. figure:: /auto_examples/core/images/thumb/sphx_glr_run_core_concepts_thumb.png
+     :alt: Core Concepts
 
-        :ref:`sphx_glr_auto_examples_core_run_core_concepts.py`
+     :ref:`sphx_glr_auto_examples_core_run_core_concepts.py`
 
 .. raw:: html
 
@@ -53,9 +54,10 @@ Understanding this functionality is vital for using gensim effectively.
 
 .. only:: html
 
-    .. figure:: /auto_examples/core/images/thumb/sphx_glr_run_corpora_and_vector_spaces_thumb.png
+ .. figure:: /auto_examples/core/images/thumb/sphx_glr_run_corpora_and_vector_spaces_thumb.png
+     :alt: Corpora and Vector Spaces
 
-        :ref:`sphx_glr_auto_examples_core_run_corpora_and_vector_spaces.py`
+     :ref:`sphx_glr_auto_examples_core_run_corpora_and_vector_spaces.py`
 
 .. raw:: html
 
@@ -73,9 +75,10 @@ Understanding this functionality is vital for using gensim effectively.
 
 .. only:: html
 
-    .. figure:: /auto_examples/core/images/thumb/sphx_glr_run_topics_and_transformations_thumb.png
+ .. figure:: /auto_examples/core/images/thumb/sphx_glr_run_topics_and_transformations_thumb.png
+     :alt: Topics and Transformations
 
-        :ref:`sphx_glr_auto_examples_core_run_topics_and_transformations.py`
+     :ref:`sphx_glr_auto_examples_core_run_topics_and_transformations.py`
 
 .. raw:: html
 
@@ -93,9 +96,10 @@ Understanding this functionality is vital for using gensim effectively.
 
 .. only:: html
 
-    .. figure:: /auto_examples/core/images/thumb/sphx_glr_run_similarity_queries_thumb.png
+ .. figure:: /auto_examples/core/images/thumb/sphx_glr_run_similarity_queries_thumb.png
+     :alt: Similarity Queries
 
-        :ref:`sphx_glr_auto_examples_core_run_similarity_queries.py`
+     :ref:`sphx_glr_auto_examples_core_run_similarity_queries.py`
 
 .. raw:: html
 
@@ -108,7 +112,7 @@ Understanding this functionality is vital for using gensim effectively.
    /auto_examples/core/run_similarity_queries
 .. raw:: html
 
-    <div style='clear:both'></div>
+    <div class="sphx-glr-clear"></div>
 
 
 
@@ -127,9 +131,10 @@ Learning-oriented lessons that introduce a particular gensim feature, e.g. a mod
 
 .. only:: html
 
-    .. figure:: /auto_examples/tutorials/images/thumb/sphx_glr_run_word2vec_thumb.png
+ .. figure:: /auto_examples/tutorials/images/thumb/sphx_glr_run_word2vec_thumb.png
+     :alt: Word2Vec Model
 
-        :ref:`sphx_glr_auto_examples_tutorials_run_word2vec.py`
+     :ref:`sphx_glr_auto_examples_tutorials_run_word2vec.py`
 
 .. raw:: html
 
@@ -147,9 +152,10 @@ Learning-oriented lessons that introduce a particular gensim feature, e.g. a mod
 
 .. only:: html
 
-    .. figure:: /auto_examples/tutorials/images/thumb/sphx_glr_run_doc2vec_lee_thumb.png
+ .. figure:: /auto_examples/tutorials/images/thumb/sphx_glr_run_doc2vec_lee_thumb.png
+     :alt: Doc2Vec Model
 
-        :ref:`sphx_glr_auto_examples_tutorials_run_doc2vec_lee.py`
+     :ref:`sphx_glr_auto_examples_tutorials_run_doc2vec_lee.py`
 
 .. raw:: html
 
@@ -167,9 +173,10 @@ Learning-oriented lessons that introduce a particular gensim feature, e.g. a mod
 
 .. only:: html
 
-    .. figure:: /auto_examples/tutorials/images/thumb/sphx_glr_run_fasttext_thumb.png
+ .. figure:: /auto_examples/tutorials/images/thumb/sphx_glr_run_fasttext_thumb.png
+     :alt: FastText Model
 
-        :ref:`sphx_glr_auto_examples_tutorials_run_fasttext.py`
+     :ref:`sphx_glr_auto_examples_tutorials_run_fasttext.py`
 
 .. raw:: html
 
@@ -187,9 +194,10 @@ Learning-oriented lessons that introduce a particular gensim feature, e.g. a mod
 
 .. only:: html
 
-    .. figure:: /auto_examples/tutorials/images/thumb/sphx_glr_run_annoy_thumb.png
+ .. figure:: /auto_examples/tutorials/images/thumb/sphx_glr_run_annoy_thumb.png
+     :alt: Fast Similarity Queries with Annoy and Word2Vec
 
-        :ref:`sphx_glr_auto_examples_tutorials_run_annoy.py`
+     :ref:`sphx_glr_auto_examples_tutorials_run_annoy.py`
 
 .. raw:: html
 
@@ -207,9 +215,10 @@ Learning-oriented lessons that introduce a particular gensim feature, e.g. a mod
 
 .. only:: html
 
-    .. figure:: /auto_examples/tutorials/images/thumb/sphx_glr_run_lda_thumb.png
+ .. figure:: /auto_examples/tutorials/images/thumb/sphx_glr_run_lda_thumb.png
+     :alt: LDA Model
 
-        :ref:`sphx_glr_auto_examples_tutorials_run_lda.py`
+     :ref:`sphx_glr_auto_examples_tutorials_run_lda.py`
 
 .. raw:: html
 
@@ -227,9 +236,10 @@ Learning-oriented lessons that introduce a particular gensim feature, e.g. a mod
 
 .. only:: html
 
-    .. figure:: /auto_examples/tutorials/images/thumb/sphx_glr_run_wmd_thumb.png
+ .. figure:: /auto_examples/tutorials/images/thumb/sphx_glr_run_wmd_thumb.png
+     :alt: Word Mover's Distance
 
-        :ref:`sphx_glr_auto_examples_tutorials_run_wmd.py`
+     :ref:`sphx_glr_auto_examples_tutorials_run_wmd.py`
 
 .. raw:: html
 
@@ -242,7 +252,7 @@ Learning-oriented lessons that introduce a particular gensim feature, e.g. a mod
    /auto_examples/tutorials/run_wmd
 .. raw:: html
 
-    <div style='clear:both'></div>
+    <div class="sphx-glr-clear"></div>
 
 
 
@@ -261,9 +271,10 @@ These **goal-oriented guides** demonstrate how to **solve a specific problem** u
 
 .. only:: html
 
-    .. figure:: /auto_examples/howtos/images/thumb/sphx_glr_run_downloader_api_thumb.png
+ .. figure:: /auto_examples/howtos/images/thumb/sphx_glr_run_downloader_api_thumb.png
+     :alt: How to download pre-trained models and corpora
 
-        :ref:`sphx_glr_auto_examples_howtos_run_downloader_api.py`
+     :ref:`sphx_glr_auto_examples_howtos_run_downloader_api.py`
 
 .. raw:: html
 
@@ -281,9 +292,10 @@ These **goal-oriented guides** demonstrate how to **solve a specific problem** u
 
 .. only:: html
 
-    .. figure:: /auto_examples/howtos/images/thumb/sphx_glr_run_doc_thumb.png
+ .. figure:: /auto_examples/howtos/images/thumb/sphx_glr_run_doc_thumb.png
+     :alt: How to Author Gensim Documentation
 
-        :ref:`sphx_glr_auto_examples_howtos_run_doc.py`
+     :ref:`sphx_glr_auto_examples_howtos_run_doc.py`
 
 .. raw:: html
 
@@ -301,9 +313,10 @@ These **goal-oriented guides** demonstrate how to **solve a specific problem** u
 
 .. only:: html
 
-    .. figure:: /auto_examples/howtos/images/thumb/sphx_glr_run_doc2vec_imdb_thumb.png
+ .. figure:: /auto_examples/howtos/images/thumb/sphx_glr_run_doc2vec_imdb_thumb.png
+     :alt: How to reproduce the doc2vec 'Paragraph Vector' paper
 
-        :ref:`sphx_glr_auto_examples_howtos_run_doc2vec_imdb.py`
+     :ref:`sphx_glr_auto_examples_howtos_run_doc2vec_imdb.py`
 
 .. raw:: html
 
@@ -321,9 +334,10 @@ These **goal-oriented guides** demonstrate how to **solve a specific problem** u
 
 .. only:: html
 
-    .. figure:: /auto_examples/howtos/images/thumb/sphx_glr_run_compare_lda_thumb.png
+ .. figure:: /auto_examples/howtos/images/thumb/sphx_glr_run_compare_lda_thumb.png
+     :alt: How to Compare LDA Models
 
-        :ref:`sphx_glr_auto_examples_howtos_run_compare_lda.py`
+     :ref:`sphx_glr_auto_examples_howtos_run_compare_lda.py`
 
 .. raw:: html
 
@@ -336,7 +350,7 @@ These **goal-oriented guides** demonstrate how to **solve a specific problem** u
    /auto_examples/howtos/run_compare_lda
 .. raw:: html
 
-    <div style='clear:both'></div>
+    <div class="sphx-glr-clear"></div>
 
 
 
@@ -379,7 +393,7 @@ Blog posts, tutorial videos, hackathons and other useful Gensim resources, from
 
 .. raw:: html
 
-    <div style='clear:both'></div>
+    <div class="sphx-glr-clear"></div>
 
 
 
@@ -389,15 +403,15 @@ Blog posts, tutorial videos, hackathons and other useful Gensim resources, from
     :class: sphx-glr-footer-gallery
 
 
-  .. container:: sphx-glr-download
+  .. container:: sphx-glr-download sphx-glr-download-python
 
-    :download:`Download all examples in Python source code: auto_examples_python.zip <//home/misha/git/gensim/docs/src/auto_examples/auto_examples_python.zip>`
+    :download:`Download all examples in Python source code: auto_examples_python.zip <//Volumes/work/workspace/gensim/trunk/docs/src/auto_examples/auto_examples_python.zip>`
 
 
 
-  .. container:: sphx-glr-download
+  .. container:: sphx-glr-download sphx-glr-download-jupyter
 
-    :download:`Download all examples in Jupyter notebooks: auto_examples_jupyter.zip <//home/misha/git/gensim/docs/src/auto_examples/auto_examples_jupyter.zip>`
+    :download:`Download all examples in Jupyter notebooks: auto_examples_jupyter.zip <//Volumes/work/workspace/gensim/trunk/docs/src/auto_examples/auto_examples_jupyter.zip>`
 
 
 .. only:: html
diff --git a/gensim/models/phrases.py b/gensim/models/phrases.py
index 9460619db8..959604a4fc 100644
--- a/gensim/models/phrases.py
+++ b/gensim/models/phrases.py
@@ -11,7 +11,7 @@
 
 * `Mikolov, et. al: "Distributed Representations of Words and Phrases and their Compositionality"
   <https://arxiv.org/abs/1310.4546>`_
-* `"Normalized (Pointwise) Mutual Information in Colocation Extraction" by Gerlof Bouma
+* `"Normalized (Pointwise) Mutual Information in Collocation Extraction" by Gerlof Bouma
   <https://svn.spraakdata.gu.se/repos/gerlof/pub/www/Docs/npmi-pfd.pdf>`_
 
 
@@ -21,39 +21,42 @@
 
     >>> from gensim.test.utils import datapath
     >>> from gensim.models.word2vec import Text8Corpus
-    >>> from gensim.models.phrases import Phrases, Phraser
+    >>> from gensim.models.phrases import Phrases
     >>>
-    >>> # Load training data.
+    >>> # Create training corpus. Must be a sequence of sentences (e.g. an iterable or a generator).
     >>> sentences = Text8Corpus(datapath('testcorpus.txt'))
-    >>> # The training corpus must be a sequence (stream, generator) of sentences,
-    >>> # with each sentence a list of tokens:
-    >>> print(list(sentences)[0][:10])
+    >>> # Each sentence must be a list of string tokens:
+    >>> first_sentence = next(iter(sentences))
+    >>> print(first_sentence[:10])
     ['computer', 'human', 'interface', 'computer', 'response', 'survey', 'system', 'time', 'user', 'interface']
     >>>
-    >>> # Train a toy bigram model.
-    >>> phrases = Phrases(sentences, min_count=1, threshold=1)
+    >>> # Train a toy phrase model on our training corpus.
+    >>> phrase_model = Phrases(sentences, delimiter='_', min_count=1, threshold=1)
+    >>>
     >>> # Apply the trained phrases model to a new, unseen sentence.
-    >>> phrases[['trees', 'graph', 'minors']]
+    >>> new_sentence = ['trees', 'graph', 'minors']
+    >>> phrase_model[new_sentence]
     ['trees_graph', 'minors']
     >>> # The toy model considered "trees graph" a single phrase => joined the two
-    >>> # tokens into a single token, `trees_graph`.
+    >>> # tokens into a single "phrase" token, using our selected `_` delimiter.
+    >>>
+    >>> # Apply the trained model to each sentence of a corpus, using the same [] syntax:
+    >>> for sent in phrase_model[sentences]:
+    ...     pass
     >>>
     >>> # Update the model with two new sentences on the fly.
-    >>> phrases.add_vocab([["hello", "world"], ["meow"]])
+    >>> phrase_model.add_vocab([["hello", "world"], ["meow"]])
     >>>
     >>> # Export the trained model = use less RAM, faster processing. Model updates no longer possible.
-    >>> bigram = Phraser(phrases)
-    >>> bigram[['trees', 'graph', 'minors']]  # apply the exported model to a sentence
+    >>> frozen_model = phrase_model.freeze()
+    >>> # Apply the frozen model; same results as before:
+    >>> frozen_model[new_sentence]
     ['trees_graph', 'minors']
     >>>
-    >>> # Apply the exported model to each sentence of a corpus:
-    >>> for sent in bigram[sentences]:
-    ...     pass
-    >>>
-    >>> # Save / load an exported collocation model.
-    >>> bigram.save("/tmp/my_bigram_model.pkl")
-    >>> bigram_reloaded = Phraser.load("/tmp/my_bigram_model.pkl")
-    >>> bigram_reloaded[['trees', 'graph', 'minors']]  # apply the exported model to a sentence
+    >>> # Save / load models.
+    >>> frozen_model.save("/tmp/my_phrase_model.pkl")
+    >>> model_reloaded = Phrases.load("/tmp/my_phrase_model.pkl")
+    >>> model_reloaded[['trees', 'graph', 'minors']]  # apply the reloaded model to a sentence
     ['trees_graph', 'minors']
 
 """
@@ -62,7 +65,6 @@
 import os
 import logging
 from collections import defaultdict
-import functools
 import itertools
 from math import log
 import pickle
@@ -73,6 +75,89 @@
 
 logger = logging.getLogger(__name__)
 
+NEGATIVE_INFINITY = float('-inf')
+
+
+def original_scorer(worda_count, wordb_count, bigram_count, len_vocab, min_count, corpus_word_count):
+    r"""Bigram scoring function, based on the original `Mikolov, et. al: "Distributed Representations
+    of Words and Phrases and their Compositionality" <https://arxiv.org/abs/1310.4546>`_.
+
+    Parameters
+    ----------
+    worda_count : int
+        Number of occurrences for first word.
+    wordb_count : int
+        Number of occurrences for second word.
+    bigram_count : int
+        Number of co-occurrences for phrase "worda_wordb".
+    len_vocab : int
+        Size of vocabulary.
+    min_count: int
+        Minimum collocation count threshold.
+    corpus_word_count : int
+        Not used in this particular scoring technique.
+
+    Returns
+    -------
+    float
+        Score for given bi-gram, greater than or equal to 0.
+
+    Notes
+    -----
+    Formula: :math:`\frac{(bigram\_count - min\_count) * len\_vocab }{ (worda\_count * wordb\_count)}`.
+
+    """
+    denom = worda_count * wordb_count
+    if denom == 0:
+        return NEGATIVE_INFINITY
+    return (bigram_count - min_count) / float(denom) * len_vocab
+
+
+def npmi_scorer(worda_count, wordb_count, bigram_count, len_vocab, min_count, corpus_word_count):
+    r"""Calculation NPMI score based on `"Normalized (Pointwise) Mutual Information in Colocation Extraction"
+    by Gerlof Bouma <https://svn.spraakdata.gu.se/repos/gerlof/pub/www/Docs/npmi-pfd.pdf>`_.
+
+    Parameters
+    ----------
+    worda_count : int
+        Number of occurrences for first word.
+    wordb_count : int
+        Number of occurrences for second word.
+    bigram_count : int
+        Number of co-occurrences for phrase "worda_wordb".
+    len_vocab : int
+        Not used.
+    min_count: int
+        Ignore all bigrams with total collected count lower than this value.
+    corpus_word_count : int
+        Total number of words in the corpus.
+
+    Returns
+    -------
+    float
+        If bigram_count >= min_count, return the collocation score, in the range -1 to 1.
+        Otherwise return -inf.
+
+    Notes
+    -----
+    Formula: :math:`\frac{ln(prop(word_a, word_b) / (prop(word_a)*prop(word_b)))}{ -ln(prop(word_a, word_b)}`,
+    where :math:`prob(word) = \frac{word\_count}{corpus\_word\_count}`
+
+    """
+    if bigram_count >= min_count:
+        corpus_word_count = float(corpus_word_count)
+        pa = worda_count / corpus_word_count
+        pb = wordb_count / corpus_word_count
+        pab = bigram_count / corpus_word_count
+        try:
+            return log(pab / (pa * pb)) / -log(pab)
+        except ValueError:  # some of the counts were zero => never a phrase
+            return NEGATIVE_INFINITY
+    else:
+        # Return -infinity to make sure that no phrases will be created
+        # from bigrams less frequent than min_count.
+        return NEGATIVE_INFINITY
+
 
 def _is_single(obj):
     """Check whether `obj` is a single document or an entire corpus.
@@ -84,7 +169,8 @@ def _is_single(obj):
     Return
     ------
     (bool, object)
-        (is_single, new) tuple, where `new` yields the same sequence as `obj`.
+        2-tuple ``(is_single_document, new_obj)`` tuple, where `new_obj`
+        yields the same sequence as the original `obj`.
 
     Notes
     -----
@@ -97,217 +183,238 @@ def _is_single(obj):
         peek = next(obj_iter)
         obj_iter = itertools.chain([peek], obj_iter)
     except StopIteration:
-        # An empty object is a single document
+        # An empty object is interpreted as a single document (not a corpus).
         return True, obj
     if isinstance(peek, str):
-        # It's a document, return the iterator
+        # First item is a string => obj is a single document for sure.
         return True, obj_iter
     if temp_iter is obj:
-        # Checking for iterator to the object
+        # An iterator / generator => interpret input as a corpus.
         return False, obj_iter
-    else:
-        # If the first item isn't a string, assume obj is a corpus
-        return False, obj
+    # If the first item isn't a string, assume obj is an iterable corpus.
+    return False, obj
 
 
-class SentenceAnalyzer:
-    """Base util class for :class:`~gensim.models.phrases.Phrases` and :class:`~gensim.models.phrases.Phraser`."""
-    def score_item(self, worda, wordb, components, scorer):
-        """Get bi-gram score statistics.
+class _PhrasesTransformation(interfaces.TransformationABC):
+    """
+    Abstract base class for :class:`~gensim.models.phrases.Phrases` and
+    :class:`~gensim.models.phrases.FrozenPhrases`.
 
-        Parameters
-        ----------
-        worda : str
-            First word of bi-gram.
-        wordb : str
-            Second word of bi-gram.
-        components : generator
-            Contain all phrases.
-        scorer : function
-            Scorer function, as given to :class:`~gensim.models.phrases.Phrases`.
-            See :func:`~gensim.models.phrases.npmi_scorer` and :func:`~gensim.models.phrases.original_scorer`.
+    """
+    def __init__(self, common_terms):
+        self.common_terms = frozenset(common_terms)
+
+    def score_candidate(self, word_a, word_b, in_between):
+        """Score a single phrase candidate.
 
         Returns
         -------
-        float
-            Score for given bi-gram. If bi-gram not present in dictionary - return -1.
-
+        (str, float)
+            2-tuple of ``(delimiter-joined phrase, phrase score)`` for a phrase,
+            or ``(None, None)`` if not a phrase.
         """
-        vocab = self.vocab
-        if worda in vocab and wordb in vocab:
-            bigram = self.delimiter.join(components)
-            if bigram in vocab:
-                return scorer(
-                    worda_count=float(vocab[worda]),
-                    wordb_count=float(vocab[wordb]),
-                    bigram_count=float(vocab[bigram]))
-        return -1
-
-    def analyze_sentence(self, sentence, threshold, common_terms, scorer):
-        """Analyze a sentence, detecting any bigrams that should be concatenated.
+        raise NotImplementedError("ABC: override this method in child classes")
+
+    def analyze_sentence(self, sentence):
+        """Analyze a sentence, concatenating any detected phrases into a single token.
 
         Parameters
         ----------
         sentence : iterable of str
             Token sequence representing the sentence to be analyzed.
-        threshold : float
-            The minimum score for a bigram to be taken into account.
-        common_terms : list of object
-            List of common terms, they receive special treatment.
-        scorer : function
-            Scorer function, as given to :class:`~gensim.models.phrases.Phrases`.
-            See :func:`~gensim.models.phrases.npmi_scorer` and :func:`~gensim.models.phrases.original_scorer`.
 
         Yields
         ------
         (str, score)
-            If bi-gram detected, a tuple where the first element is a detect bigram, second its score.
-            Otherwise, the first tuple element is a single word and second is None.
+            Iterate through the input sentence tokens and yield 2-tuples of:
+            - ``(concatenated_phrase_tokens, score)`` for token sequences that form a phrase.
+            - ``(word, None)`` if the token is not a part of a phrase.
 
         """
-        s = [utils.any2utf8(w) for w in sentence]
-        # adding None is a trick that helps getting an automatic happy ending
-        # as it won't be a common_word, nor score
-        s.append(None)
-        last_uncommon = None
-        in_between = []
-        for word in s:
-            is_common = word in common_terms
-            if not is_common and last_uncommon:
-                chain = [last_uncommon] + in_between + [word]
-                # test between last_uncommon
-                score = self.score_item(
-                    worda=last_uncommon,
-                    wordb=word,
-                    components=chain,
-                    scorer=scorer,
-                )
-                if score > threshold:
-                    yield (chain, score)
-                    last_uncommon = None
-                    in_between = []
+        start_token, in_between = None, []
+        for word in sentence:
+            if word not in self.common_terms:
+                # The current word is a normal token, not a stop word, which means it's a potential
+                # beginning (or end) of a phrase.
+                if start_token:
+                    # We're inside a potential phrase, of which this word is the end.
+                    phrase, score = self.score_candidate(start_token, word, in_between)
+                    if score is not None:
+                        # Phrase detected!
+                        yield phrase, score
+                        start_token, in_between = None, []
+                    else:
+                        # Not a phrase after all. Dissolve the candidate's constituent tokens as individual words.
+                        yield start_token, None
+                        for w in in_between:
+                            yield w, None
+                        start_token, in_between = word, []  # new potential phrase starts here
                 else:
-                    # release words individually
-                    for w in itertools.chain([last_uncommon], in_between):
-                        yield (w, None)
-                    in_between = []
-                    last_uncommon = word
-            elif not is_common:
-                last_uncommon = word
-            else:  # common term
-                if last_uncommon:
-                    # wait for uncommon resolution
+                    # Not inside a potential bigram yet; start a new potential bigram here.
+                    start_token, in_between = word, []
+            else:  # We're a stop word.
+                if start_token:
+                    # We're inside a potential bigram: add the stopword and keep growing the phrase.
                     in_between.append(word)
                 else:
-                    yield (word, None)
+                    # Not inside a bigram: emit the stopword and move on. Phrases never begin with a stopword.
+                    yield word, None
+        # Emit any non-phrase tokens at the end.
+        if start_token:
+            yield start_token, None
+            for w in in_between:
+                yield w, None
 
+    def __getitem__(self, sentence):
+        """Convert the input sequence of tokens `sentence` into a sequence of tokens where adjacent
+        tokens are replaced by a single token if they form a bigram collocation.
 
-class PhrasesTransformation(interfaces.TransformationABC):
-    """Base util class for :class:`~gensim.models.phrases.Phrases` and :class:`~gensim.models.phrases.Phraser`."""
+        If `sentence` is an entire corpus (iterable of sentences rather than a single
+        sentence), return an iterable that converts each of the corpus' sentences
+        into phrases on the fly, one after another.
+
+        Parameters
+        ----------
+        sentence : {list of str, iterable of list of str}
+            Input sentence or a stream of sentences.
+
+        Return
+        ------
+        {list of str, iterable of list of str}
+            Sentence with phrase tokens joined by `self.delimiter` character, if input was a single sentence.
+            A generator of such joined sentences if input was a corpus.
+
+        """
+        is_single, sentence = _is_single(sentence)
+        if not is_single:
+            # If the input is an entire corpus (rather than a single sentence),
+            # return an iterable stream.
+            return self._apply(sentence)
+
+        return [token for token, _ in self.analyze_sentence(sentence)]
+
+    def export_phrases(self, sentences):
+        """Get all unique phrases (multi-word expressions) that appear in ``sentences``, and their scores.
+
+        Parameters
+        ----------
+        sentences : iterable of list of str
+            Text corpus.
+
+        Returns
+        -------
+        dict(str, float)
+           Unique phrases mapped to their scores.
+
+        Example
+        -------
+        .. sourcecode:: pycon
+
+            >>> from gensim.test.utils import datapath
+            >>> from gensim.models.word2vec import Text8Corpus
+            >>> from gensim.models.phrases import Phrases
+            >>>
+            >>> sentences = Text8Corpus(datapath('testcorpus.txt'))
+            >>> phrases = Phrases(sentences, min_count=1, threshold=0.1)
+            >>>
+            >>> for phrase, score in phrases.export_phrases(sentences).items():
+            ...     print(phrase, score)
+        """
+        result = {}
+        for sentence in sentences:
+            for phrase, score in self.analyze_sentence(sentence):
+                if score is not None:
+                    result[phrase] = score
+        return result
 
     @classmethod
     def load(cls, *args, **kwargs):
         """Load a previously saved :class:`~gensim.models.phrases.Phrases` /
-        :class:`~gensim.models.phrases.Phraser` class. Handles backwards compatibility from older
-        :class:`~gensim.models.phrases.Phrases` / :class:`~gensim.models.phrases.Phraser`
-        versions which did not support pluggable scoring functions.
+        :class:`~gensim.models.phrases.FrozenPhrases` model.
+
+        Handles backwards compatibility from older versions which did not support pluggable scoring functions.
 
         Parameters
         ----------
         args : object
-            Sequence of arguments, see :class:`~gensim.utils.SaveLoad.load` for more information.
+            See :class:`~gensim.utils.SaveLoad.load`.
         kwargs : object
-            Sequence of arguments, see :class:`~gensim.utils.SaveLoad.load` for more information.
+            See :class:`~gensim.utils.SaveLoad.load`.
 
         """
-        model = super(PhrasesTransformation, cls).load(*args, **kwargs)
-        # update older models
-        # if value in phrasegrams dict is a tuple, load only the scores.
+        model = super(_PhrasesTransformation, cls).load(*args, **kwargs)
 
-        for component, score in getattr(model, "phrasegrams", {}).items():
+        # Upgrade FrozenPhrases
+        try:
+            phrasegrams = getattr(model, "phrasegrams", {})
+            component, score = next(iter(phrasegrams.items()))
             if isinstance(score, tuple):
-                frequency, score_val = score
-                model.phrasegrams[component] = score_val
-
-        # if no scoring parameter, use default scoring
+                # Value in phrasegrams used to be a tuple; keep only the 2nd tuple component = score.
+                model.phrasegrams = {
+                    str(model.delimiter.join(key), encoding='utf8'): val[1]
+                    for key, val in phrasegrams.items()
+                }
+            elif isinstance(component, tuple):  # 3.8 => 4.0: phrasegram keys are strings, not tuples with bytestrings
+                model.phrasegrams = {
+                    str(model.delimiter.join(component), encoding='utf8'): score
+                    for key, val in phrasegrams.items()
+                }
+        except StopIteration:
+            # no phrasegrams, nothing to upgrade
+            pass
+
+        # If no scoring parameter, use default scoring.
         if not hasattr(model, 'scoring'):
-            logger.info('older version of %s loaded without scoring function', cls.__name__)
-            logger.info('setting pluggable scoring method to original_scorer for compatibility')
+            logger.warning('older version of %s loaded without scoring function', cls.__name__)
+            logger.warning('setting pluggable scoring method to original_scorer for compatibility')
             model.scoring = original_scorer
-        # if there is a scoring parameter, and it's a text value, load the proper scoring function
+        # If there is a scoring parameter, and it's a text value, load the proper scoring function.
         if hasattr(model, 'scoring'):
             if isinstance(model.scoring, str):
                 if model.scoring == 'default':
-                    logger.info('older version of %s loaded with "default" scoring parameter', cls.__name__)
-                    logger.info('setting scoring method to original_scorer pluggable scoring method for compatibility')
+                    logger.warning('older version of %s loaded with "default" scoring parameter', cls.__name__)
+                    logger.warning('setting scoring method to original_scorer for compatibility')
                     model.scoring = original_scorer
                 elif model.scoring == 'npmi':
-                    logger.info('older version of %s loaded with "npmi" scoring parameter', cls.__name__)
-                    logger.info('setting scoring method to npmi_scorer pluggable scoring method for compatibility')
+                    logger.warning('older version of %s loaded with "npmi" scoring parameter', cls.__name__)
+                    logger.warning('setting scoring method to npmi_scorer for compatibility')
                     model.scoring = npmi_scorer
                 else:
-                    raise ValueError(
-                        'failed to load %s model with unknown scoring setting %s' % (cls.__name__, model.scoring))
-        # if there is no common_terms attribute, initialize
+                    raise ValueError(f'failed to load {cls.__name__} model, unknown scoring "{model.scoring}"')
+        # Initialize new attributes to default values.
         if not hasattr(model, "common_terms"):
-            logger.info('older version of %s loaded without common_terms attribute', cls.__name__)
-            logger.info('setting common_terms to empty set')
+            logger.warning(
+                'older version of %s loaded without common_terms attribute, setting it to empty set',
+                cls.__name__,
+            )
             model.common_terms = frozenset()
-        return model
-
-
-def _sentence2token(phrase_class, sentence):
-    """ Convert the input tokens `sentence` into tokens where detected bigrams are joined by a selected delimiter.
 
-    This function is used by: meth:`~gensim.models.phrases.Phrases.__getitem__` and
-    meth:`~gensim.models.phrases.Phraser.__getitem__`
-
-    Parameters
-    ----------
-    phrase_class :
-        class:`~gensim.models.phrases.Phrases` or :class:`~gensim.models.phrases.Phraser`
-    sentence : {list of str, iterable of list of str}
-            Sentence or text corpus.
-
-    Returns
-    -------
-    {list of str, :class:`~gensim.interfaces.TransformedCorpus`}
-        `sentence` with detected phrase bigrams merged together, or a streamed corpus of such sentences
-        if the input was a corpus.
-
-    """
-    is_single, sentence = _is_single(sentence)
-    if not is_single:
-        # if the input is an entire corpus (rather than a single sentence),
-        # return an iterable stream.
-        return phrase_class._apply(sentence)
-
-    delimiter = phrase_class.delimiter
-    if hasattr(phrase_class, 'vocab'):
-        scorer = functools.partial(
-            phrase_class.scoring,
-            len_vocab=float(len(phrase_class.vocab)),
-            min_count=float(phrase_class.min_count),
-            corpus_word_count=float(phrase_class.corpus_word_count))
-    else:
-        scorer = None
-    bigrams = phrase_class.analyze_sentence(sentence, threshold=phrase_class.threshold,
-        common_terms=phrase_class.common_terms, scorer=scorer)
+        if not hasattr(model, 'corpus_word_count'):
+            logger.warning('older version of %s loaded without corpus_word_count', cls.__name__)
+            logger.warning('setting corpus_word_count to 0, do not use it in your scoring function')
+            model.corpus_word_count = 0
 
-    new_s = []
-    for words, score in bigrams:
-        if score is not None:
-            words = delimiter.join(words)
-        new_s.append(words)
-    return [utils.to_unicode(w) for w in new_s]
+        # Before 4.0.0, we stored strings as UTF8 bytes internally, to save RAM. Since 4.0.0, we use strings.
+        if getattr(model, 'vocab', None):
+            word = next(iter(model.vocab))  # get a random key – any key will do
+            if not isinstance(word, str):
+                logger.info("old version of %s loaded, upgrading %i words in memory", cls.__name__, len(model.vocab))
+                logger.info("re-save the loaded model to avoid this upgrade in the future")
+                vocab = defaultdict(int)
+                for key, value in model.vocab.items():  # needs lots of extra RAM temporarily!
+                    vocab[str(key, encoding='utf8')] = value
+                model.vocab = vocab
+        if not isinstance(model.delimiter, str):
+            model.delimiter = str(model.delimiter, encoding='utf8')
+        return model
 
 
-class Phrases(SentenceAnalyzer, PhrasesTransformation):
+class Phrases(_PhrasesTransformation):
     """Detect phrases based on collocation counts."""
 
     def __init__(
             self, sentences=None, min_count=5, threshold=10.0,
-            max_vocab_size=40000000, delimiter=b'_', progress_per=10000,
+            max_vocab_size=40000000, delimiter='_', progress_per=10000,
             scoring='default', common_terms=frozenset(),
         ):
         """
@@ -330,7 +437,7 @@ def __init__(
             to keep memory under control. The default of 40M needs about 3.6GB of RAM. Increase/decrease
             `max_vocab_size` depending on how much available memory you have.
         delimiter : str, optional
-            Glue character used to join collocation tokens, should be a byte string (e.g. b'_').
+            Glue character used to join collocation tokens.
         scoring : {'default', 'npmi', function}, optional
             Specify how potential phrases are scored. `scoring` can be set with either a string that refers to a
             built-in scoring function, or with a function with the expected parameter names.
@@ -359,9 +466,40 @@ def __init__(
         * corpus_word_count - the total number of tokens (non-unique) in `sentences`
 
         The scoring function **must accept all these parameters**, even if it doesn't use them in its scoring.
+
         The scoring function **must be pickleable**.
 
+        Examples
+        ----------
+        .. sourcecode:: pycon
+
+            >>> from gensim.test.utils import datapath
+            >>> from gensim.models.word2vec import Text8Corpus
+            >>> from gensim.models.phrases import Phrases
+            >>>
+            >>> # Load corpus and train a model.
+            >>> sentences = Text8Corpus(datapath('testcorpus.txt'))
+            >>> phrases = Phrases(sentences, min_count=1, threshold=1)
+            >>>
+            >>> # Use the model to detect phrases in a new sentence.
+            >>> sent = [u'trees', u'graph', u'minors']
+            >>> print(phrases[sent])
+            [u'trees_graph', u'minors']
+            >>>
+            >>> # Or transform multiple sentences at once.
+            >>> sents = [[u'trees', u'graph', u'minors'], [u'graph', u'minors']]
+            >>> for phrase in frozen_phrases[sents]:
+            ...     print(phrase)
+            [u'trees_graph', u'minors']
+            [u'graph_minors']
+            >>>
+            >>> # Export a FrozenPhrases object that is more efficient but doesn't allow any more training.
+            >>> frozen_phrases = phrases.freeze()
+            >>> print(frozen_phrases[sent])
+            [u'trees_graph', u'minors']
+
         """
+        super().__init__(common_terms=common_terms)
         if min_count <= 0:
             raise ValueError("min_count should be at least 1")
 
@@ -370,10 +508,9 @@ def __init__(
         if scoring == 'npmi' and (threshold < -1 or threshold > 1):
             raise ValueError("threshold should be between -1 and 1 for npmi scoring")
 
-        # set scoring based on string
-        # intentially override the value of the scoring parameter rather than set self.scoring here,
-        # to still run the check of scoring function parameters in the next code block
-
+        # Set scoring based on string.
+        # Intentially override the value of the scoring parameter rather than set self.scoring here,
+        # to still run the check of scoring function parameters in the next code block.
         if isinstance(scoring, str):
             if scoring == 'default':
                 scoring = original_scorer
@@ -382,65 +519,45 @@ def __init__(
             else:
                 raise ValueError(f'unknown scoring method string {scoring} specified')
 
-        scoring_parameters = [
+        scoring_params = [
             'worda_count', 'wordb_count', 'bigram_count', 'len_vocab', 'min_count', 'corpus_word_count',
         ]
         if callable(scoring):
-            if all(parameter in getargspec(scoring)[0] for parameter in scoring_parameters):
+            missing = [param for param in scoring_params if param not in getargspec(scoring)[0]]
+            if not missing:
                 self.scoring = scoring
             else:
-                raise ValueError('scoring function missing expected parameters')
+                raise ValueError(f'scoring function missing expected parameters {missing}')
 
         self.min_count = min_count
         self.threshold = threshold
         self.max_vocab_size = max_vocab_size
-        self.vocab = defaultdict(int)  # mapping between utf8 token => its count
+        self.vocab = defaultdict(int)  # mapping between token => its count
         self.min_reduce = 1  # ignore any tokens with count smaller than this
         self.delimiter = delimiter
         self.progress_per = progress_per
         self.corpus_word_count = 0
-        self.common_terms = frozenset(utils.any2utf8(w) for w in common_terms)
 
-        # ensure picklability of custom scorer
+        # Ensure picklability of the scorer.
         try:
             pickle.loads(pickle.dumps(self.scoring))
         except pickle.PickleError:
-            raise pickle.PickleError('Custom Phrases scoring function must be pickle-able')
+            raise pickle.PickleError(f'Custom scoring function in {self.__class__.__name__} must be pickle-able')
 
         if sentences is not None:
             self.add_vocab(sentences)
 
-    @classmethod
-    def load(cls, *args, **kwargs):
-        """Load a previously saved Phrases class.
-        Handles backwards compatibility from older Phrases versions which did not support pluggable scoring functions.
-
-        Parameters
-        ----------
-        args : object
-            Sequence of arguments, see :class:`~gensim.utils.SaveLoad.load` for more information.
-        kwargs : object
-            Sequence of arguments, see :class:`~gensim.utils.SaveLoad.load` for more information.
-
-        """
-        model = super(Phrases, cls).load(*args, **kwargs)
-        if not hasattr(model, 'corpus_word_count'):
-            logger.info('older version of %s loaded without corpus_word_count', cls.__name__)
-            logger.info('Setting it to 0, do not use it in your scoring function.')
-            model.corpus_word_count = 0
-        return model
-
     def __str__(self):
-        """Get short string representation of this phrase detector."""
         return "%s<%i vocab, min_count=%s, threshold=%s, max_vocab_size=%s>" % (
             self.__class__.__name__, len(self.vocab), self.min_count,
             self.threshold, self.max_vocab_size,
         )
 
     @staticmethod
-    def learn_vocab(sentences, max_vocab_size, delimiter=b'_', progress_per=10000,
-                    common_terms=frozenset()):
-        """Collect unigram/bigram counts from the `sentences` iterable.
+    def _learn_vocab(
+            sentences, max_vocab_size, delimiter='_', common_terms=frozenset(), progress_per=10000,
+        ):
+        """Collect unigram and bigram counts from the `sentences` iterable.
 
         Parameters
         ----------
@@ -451,62 +568,40 @@ def learn_vocab(sentences, max_vocab_size, delimiter=b'_', progress_per=10000,
             for such examples.
         max_vocab_size : int
             Maximum size (number of tokens) of the vocabulary. Used to control pruning of less common words,
-            to keep memory under control. The default of 40M needs about 3.6GB of RAM. Increase/decrease
+            to keep memory under control. 40M needs about 3.6GB of RAM. Increase/decrease
             `max_vocab_size` depending on how much available memory you have.
         delimiter : str, optional
-            Glue character used to join collocation tokens, should be a byte string (e.g. b'_').
-        progress_per : int
-            Write logs every `progress_per` sentence.
+            Glue character used to join collocation tokens.
         common_terms : set of str, optional
-            List of "stop words" that won't affect frequency count of expressions containing them.
-            Allow to detect expressions like "bank_of_america" or "eye_of_the_beholder".
+            List of "stop words" that won't affect frequency count of phrases containing them.
+            Allow to detect phrases like "bank_of_america" or "eye_of_the_beholder".
+        progress_per : int
+            Log progress once every `progress_per` sentences.
 
         Return
         ------
         (int, dict of (str, int), int)
-            Number of pruned words, counters for each word/bi-gram and total number of words.
-
-        Example
-        ----------
-        .. sourcecode:: pycon
-
-            >>> from gensim.test.utils import datapath
-            >>> from gensim.models.word2vec import Text8Corpus
-            >>> from gensim.models.phrases import Phrases
-            >>>
-            >>> sentences = Text8Corpus(datapath('testcorpus.txt'))
-            >>> pruned_words, counters, total_words = Phrases.learn_vocab(sentences, 100)
-            >>> (pruned_words, total_words)
-            (1, 29)
-            >>> counters['computer']
-            2
-            >>> counters['response_time']
-            1
+            Number of pruned words, counters for each word/bi-gram, and total number of words.
 
         """
-        sentence_no = -1
-        total_words = 0
-        logger.info("collecting all words and their counts")
+        sentence_no, total_words, min_reduce = -1, 0, 1
         vocab = defaultdict(int)
-        min_reduce = 1
+        logger.info("collecting all words and their counts")
         for sentence_no, sentence in enumerate(sentences):
             if sentence_no % progress_per == 0:
                 logger.info(
                     "PROGRESS: at sentence #%i, processed %i words and %i word types",
                     sentence_no, total_words, len(vocab),
                 )
-            s = [utils.any2utf8(w) for w in sentence]
-            last_uncommon = None
-            in_between = []
-            for word in s:
+            start_token, in_between = None, []
+            for word in sentence:
                 if word not in common_terms:
                     vocab[word] += 1
-                    if last_uncommon is not None:
-                        components = itertools.chain([last_uncommon], in_between, [word])
-                        vocab[delimiter.join(components)] += 1
-                    last_uncommon = word
-                    in_between = []
-                elif last_uncommon is not None:
+                    if start_token is not None:
+                        phrase_tokens = itertools.chain([start_token], in_between, [word])
+                        vocab[delimiter.join(phrase_tokens)] += 1
+                    start_token, in_between = word, []  # treat word as both end of a phrase AND beginning of another
+                elif start_token is not None:
                     in_between.append(word)
                 total_words += 1
 
@@ -515,13 +610,13 @@ def learn_vocab(sentences, max_vocab_size, delimiter=b'_', progress_per=10000,
                 min_reduce += 1
 
         logger.info(
-            "collected %i word types from a corpus of %i words (unigram + bigrams) and %i sentences",
-            len(vocab), total_words, sentence_no + 1
+            "collected %i token types (unigram + bigrams) from a corpus of %i words and %i sentences",
+            len(vocab), total_words, sentence_no + 1,
         )
         return min_reduce, vocab, total_words
 
     def add_vocab(self, sentences):
-        """Update model with new `sentences`.
+        """Update model parameters with new `sentences`.
 
         Parameters
         ----------
@@ -535,7 +630,8 @@ def add_vocab(self, sentences):
             >>> from gensim.test.utils import datapath
             >>> from gensim.models.word2vec import Text8Corpus
             >>> from gensim.models.phrases import Phrases
-            >>> # Create corpus and use it for phrase detector
+            >>>
+            >>> # Train a phrase detector from a text corpus.
             >>> sentences = Text8Corpus(datapath('testcorpus.txt'))
             >>> phrases = Phrases(sentences)  # train model
             >>> assert len(phrases.vocab) == 37
@@ -549,16 +645,18 @@ def add_vocab(self, sentences):
             >>> assert len(phrases.vocab) == 60
 
         """
-        # uses a separate vocab to collect the token counts from `sentences`.
-        # this consumes more RAM than merging new sentences into `self.vocab`
+        # Uses a separate vocab to collect the token counts from `sentences`.
+        # This consumes more RAM than merging new sentences into `self.vocab`
         # directly, but gives the new sentences a fighting chance to collect
         # sufficient counts, before being pruned out by the (large) accumulated
         # counts collected in previous learn_vocab runs.
-        min_reduce, vocab, total_words = self.learn_vocab(
-            sentences, self.max_vocab_size, self.delimiter, self.progress_per, self.common_terms)
+        min_reduce, vocab, total_words = self._learn_vocab(
+            sentences, max_vocab_size=self.max_vocab_size, delimiter=self.delimiter,
+            progress_per=self.progress_per, common_terms=self.common_terms,
+        )
 
         self.corpus_word_count += total_words
-        if len(self.vocab) > 0:
+        if self.vocab:
             logger.info("merging %i counts into %s", len(vocab), self)
             self.min_reduce = max(self.min_reduce, min_reduce)
             for word, count in vocab.items():
@@ -566,225 +664,59 @@ def add_vocab(self, sentences):
             if len(self.vocab) > self.max_vocab_size:
                 utils.prune_vocab(self.vocab, self.min_reduce)
                 self.min_reduce += 1
-            logger.info("merged %s", self)
         else:
-            # in common case, avoid doubling gigantic dict
-            logger.info("using %i counts as vocab in %s", len(vocab), self)
+            # Optimization for a common case: the current vocab is empty, so apply
+            # the new vocab directly, no need to double it in memory.
             self.vocab = vocab
-
-    def export_phrases(self, sentences, out_delimiter=b' ', as_tuples=False):
-        """Get all phrases that appear in 'sentences' that pass the bigram threshold.
-
-        Parameters
-        ----------
-        sentences : iterable of list of str
-            Text corpus.
-        out_delimiter : str, optional
-            Delimiter used to "glue" together words that form a bigram phrase.
-        as_tuples : bool, optional
-            Yield `(tuple(words), score)` instead of `(out_delimiter.join(words), score)`?
-
-        Yields
-        ------
-        ((str, str), float) **or** (str, float)
-            Phrases detected in `sentences`. Return type depends on the `as_tuples` parameter.
-
-        Example
-        -------
-        .. sourcecode:: pycon
-
-            >>> from gensim.test.utils import datapath
-            >>> from gensim.models.word2vec import Text8Corpus
-            >>> from gensim.models.phrases import Phrases
-            >>>
-            >>> sentences = Text8Corpus(datapath('testcorpus.txt'))
-            >>> phrases = Phrases(sentences, min_count=1, threshold=0.1)
-            >>>
-            >>> for phrase, score in phrases.export_phrases(sentences):
-            ...     pass
-
-        """
-        analyze_sentence = functools.partial(
-            self.analyze_sentence,
-            threshold=self.threshold,
-            common_terms=self.common_terms,
-            scorer=functools.partial(
-                self.scoring,
-                len_vocab=float(len(self.vocab)),
-                min_count=float(self.min_count),
-                corpus_word_count=float(self.corpus_word_count),
-            ),
+        logger.info("merged %s", self)
+
+    def score_candidate(self, word_a, word_b, in_between):
+        # Micro optimization: check for quick early-out conditions, before the actual scoring.
+        word_a_cnt = self.vocab[word_a]
+        if word_a_cnt <= 0:
+            return None, None
+
+        word_b_cnt = self.vocab[word_b]
+        if word_b_cnt <= 0:
+            return None, None
+
+        phrase = self.delimiter.join([word_a] + in_between + [word_b])
+        # XXX: Why do we care about *all* phrase tokens? Why not just score the start+end bigram?
+        phrase_cnt = self.vocab[phrase]
+        if phrase_cnt <= 0:
+            return None, None
+
+        score = self.scoring(
+            worda_count=word_a_cnt, wordb_count=word_b_cnt, bigram_count=phrase_cnt,
+            len_vocab=len(self.vocab), min_count=self.min_count, corpus_word_count=self.corpus_word_count,
         )
-        for sentence in sentences:
-            bigrams = analyze_sentence(sentence)
-            # keeps only not None scores
-            filtered = ((words, score) for words, score in bigrams if score is not None)
-            for words, score in filtered:
-                if as_tuples:
-                    yield (tuple(words), score)
-                else:
-                    yield (out_delimiter.join(words), score)
+        if score <= self.threshold:
+            return None, None
 
-    def __getitem__(self, sentence):
-        """Convert the input tokens `sentence` into tokens where detected bigrams are joined by a selected delimiter.
+        return phrase, score
 
-        If `sentence` is an entire corpus (iterable of sentences rather than a single
-        sentence), return an iterable that converts each of the corpus' sentences
-        into phrases on the fly, one after another.
+    def freeze(self):
+        """
+        Return an object that contains the bare minimum of information while still allowing
+        phrase detection. See :class:`~gensim.models.phrases.FrozenPhrases`.
 
-        Parameters
-        ----------
-        sentence : {list of str, iterable of list of str}
-            Sentence or text corpus.
+        Use this "frozen model" to dramatically reduce RAM footprint if you don't plan to
+        make any further changes to your `Phrases` model.
 
         Returns
         -------
-        {list of str, :class:`gensim.interfaces.TransformedCorpus`}
-            `sentence` with detected phrase bigrams merged together, or a streamed corpus of such sentences
-            if the input was a corpus.
-
-        Examples
-        ----------
-        .. sourcecode:: pycon
-
-            >>> from gensim.test.utils import datapath
-            >>> from gensim.models.word2vec import Text8Corpus
-            >>> from gensim.models.phrases import Phrases, Phraser
-            >>>
-            >>> # Create corpus
-            >>> sentences = Text8Corpus(datapath('testcorpus.txt'))
-            >>>
-            >>> # Train the detector with:
-            >>> phrases = Phrases(sentences, min_count=1, threshold=1)
-            >>> # Input is a list of unicode strings:
-            >>> sent = [u'trees', u'graph', u'minors']
-            >>> # Both of these tokens appear in corpus at least twice, and phrase score is higher, than treshold = 1:
-            >>> print(phrases[sent])
-            [u'trees_graph', u'minors']
-            >>>
-            >>> sentences = Text8Corpus(datapath('testcorpus.txt'))
-            >>> phrases = Phrases(sentences, min_count=1, threshold=1)
-            >>> phraser = Phraser(phrases)  # for speedup
-            >>>
-            >>> sent = [[u'trees', u'graph', u'minors'], [u'graph', u'minors']]
-            >>> for phrase in phraser[sent]:
-            ...     pass
+        :class:`~gensim.models.phrases.FrozenPhrases`
+            Exported object that's smaller, faster, but doesn't support model updates.
 
         """
-        return _sentence2token(self, sentence)
-
-
-def original_scorer(worda_count, wordb_count, bigram_count, len_vocab, min_count, corpus_word_count):
-    r"""Bigram scoring function, based on the original `Mikolov, et. al: "Distributed Representations
-    of Words and Phrases and their Compositionality" <https://arxiv.org/abs/1310.4546>`_.
+        return FrozenPhrases(self)
 
-    Parameters
-    ----------
-    worda_count : int
-        Number of occurrences for first word.
-    wordb_count : int
-        Number of occurrences for second word.
-    bigram_count : int
-        Number of co-occurrences for phrase "worda_wordb".
-    len_vocab : int
-        Size of vocabulary.
-    min_count: int
-        Minimum collocation count threshold.
-    corpus_word_count : int
-        Not used in this particular scoring technique.
-
-    Returns
-    -------
-    float
-        Score for given bi-gram, greater than or equal to 0.
-
-    Notes
-    -----
-    Formula: :math:`\frac{(bigram\_count - min\_count) * len\_vocab }{ (worda\_count * wordb\_count)}`.
 
-    """
-    return (bigram_count - min_count) / worda_count / wordb_count * len_vocab
-
-
-def npmi_scorer(worda_count, wordb_count, bigram_count, len_vocab, min_count, corpus_word_count):
-    r"""Calculation NPMI score based on `"Normalized (Pointwise) Mutual Information in Colocation Extraction"
-    by Gerlof Bouma <https://svn.spraakdata.gu.se/repos/gerlof/pub/www/Docs/npmi-pfd.pdf>`_.
-
-    Parameters
-    ----------
-    worda_count : int
-        Number of occurrences for first word.
-    wordb_count : int
-        Number of occurrences for second word.
-    bigram_count : int
-        Number of co-occurrences for phrase "worda_wordb".
-    len_vocab : int
-        Not used.
-    min_count: int
-        Ignore all bigrams with total collected count lower than this value.
-    corpus_word_count : int
-        Total number of words in the corpus.
-
-    Returns
-    -------
-    float
-        Score for given bi-gram, in the range -1 to 1.
-
-    Notes
-    -----
-    Formula: :math:`\frac{ln(prop(word_a, word_b) / (prop(word_a)*prop(word_b)))}{ -ln(prop(word_a, word_b)}`,
-    where :math:`prob(word) = \frac{word\_count}{corpus\_word\_count}`
-
-    """
-    if bigram_count >= min_count:
-        pa = worda_count / corpus_word_count
-        pb = wordb_count / corpus_word_count
-        pab = bigram_count / corpus_word_count
-        return log(pab / (pa * pb)) / -log(pab)
-    else:
-        # Return -infinity to make sure that no phrases will be created
-        # from bigrams less frequent than min_count
-        return float('-inf')
-
-
-def pseudocorpus(source_vocab, sep, common_terms=frozenset()):
-    """Feeds `source_vocab`'s compound keys back to it, to discover phrases.
-
-    Parameters
-    ----------
-    source_vocab : iterable of list of str
-        Tokens vocabulary.
-    sep : str
-        Separator element.
-    common_terms : set, optional
-        Immutable set of stopwords.
-
-    Yields
-    ------
-    list of str
-        Phrase.
-
-    """
-    for k in source_vocab:
-        if sep not in k:
-            continue
-        unigrams = k.split(sep)
-        for i in range(1, len(unigrams)):
-            if unigrams[i - 1] not in common_terms:
-                # do not join common terms
-                cterms = list(itertools.takewhile(lambda w: w in common_terms, unigrams[i:]))
-                tail = unigrams[i + len(cterms):]
-                components = [sep.join(unigrams[:i])] + cterms
-                if tail:
-                    components.append(sep.join(tail))
-                yield components
-
-
-class Phraser(SentenceAnalyzer, PhrasesTransformation):
+class FrozenPhrases(_PhrasesTransformation):
     """Minimal state & functionality exported from :class:`~gensim.models.phrases.Phrases`.
 
     The goal of this class is to cut down memory consumption of `Phrases`, by discarding model state
-    not strictly needed for the bigram detection task.
+    not strictly needed for the phrase detection task.
 
     Use this instead of `Phrases` if you do not need to update the bigram statistics with new documents any more.
 
@@ -796,27 +728,28 @@ def __init__(self, phrases_model):
         Parameters
         ----------
         phrases_model : :class:`~gensim.models.phrases.Phrases`
-            Trained phrases instance.
+            Trained phrases instance, to extract all phrases from.
 
         Notes
         -----
-        After the one-time initialization, a :class:`~gensim.models.phrases.Phraser` will be much smaller and somewhat
-        faster than using the full :class:`~gensim.models.phrases.Phrases` model.
+        After the one-time initialization, a :class:`~gensim.models.phrases.FrozenPhrases` will be much
+        smaller and faster than using the full :class:`~gensim.models.phrases.Phrases` model.
 
         Examples
-        --------
+        ----------
         .. sourcecode:: pycon
 
             >>> from gensim.test.utils import datapath
             >>> from gensim.models.word2vec import Text8Corpus
-            >>> from gensim.models.phrases import Phrases, Phraser
+            >>> from gensim.models.phrases import Phrases
             >>>
+            >>> # Load corpus and train a model.
             >>> sentences = Text8Corpus(datapath('testcorpus.txt'))
             >>> phrases = Phrases(sentences, min_count=1, threshold=1)
             >>>
-            >>> bigram = Phraser(phrases)
-            >>> sent = [u'trees', u'graph', u'minors']
-            >>> print(bigram[sent])
+            >>> # Export a FrozenPhrases object that is more efficient but doesn't allow further training.
+            >>> frozen_phrases = phrases.freeze()
+            >>> print(frozen_phrases[sent])
             [u'trees_graph', u'minors']
 
         """
@@ -825,99 +758,43 @@ def __init__(self, phrases_model):
         self.delimiter = phrases_model.delimiter
         self.scoring = phrases_model.scoring
         self.common_terms = phrases_model.common_terms
-        corpus = self.pseudocorpus(phrases_model)
-        self.phrasegrams = {}
-        logger.info('source_vocab length %i', len(phrases_model.vocab))
-        count = 0
-        for bigram, score in phrases_model.export_phrases(corpus, self.delimiter, as_tuples=True):
-            if bigram in self.phrasegrams:
-                logger.info('Phraser repeat %s', bigram)
-            self.phrasegrams[bigram] = score
-            count += 1
-            if not count % 50000:
-                logger.info('Phraser added %i phrasegrams', count)
-        logger.info('Phraser built with %i phrasegrams', len(self.phrasegrams))
-
-    def pseudocorpus(self, phrases_model):
-        """Alias for :func:`gensim.models.phrases.pseudocorpus`.
-
-        Parameters
-        ----------
-        phrases_model : :class:`~gensim.models.phrases.Phrases`
-            Phrases instance.
-
-        Return
-        ------
-        generator
-            Generator with phrases.
-
-        """
-        return pseudocorpus(phrases_model.vocab, phrases_model.delimiter, phrases_model.common_terms)
+        logger.info('exporting phrases from %s', phrases_model)
+        self.phrasegrams = self._import_phrases(phrases_model)
+        logger.info('exported %s', self)
 
-    def score_item(self, worda, wordb, components, scorer):
-        """Score a bigram.
+    def __str__(self):
+        return "%s<%i phrases, min_count=%s, threshold=%s>" % (
+            self.__class__.__name__, len(self.phrasegrams), self.min_count, self.threshold,
+        )
 
-        Parameters
-        ----------
-        worda : str
-            First word for comparison.
-        wordb : str
-            Second word for comparison.
-        components : generator
-            Contain phrases.
-        scorer : {'default', 'npmi'}
-            NOT USED.
+    def _import_phrases(self, phrases_model):
+        """Extract all phrases that pass the threshold out of `phrases_model`.
 
         Returns
-        -------
-        float
-            Score for given bi-gram, if bi-gram not presented in dictionary - return -1.
+        ------
+        dict[str, float]
+            Mapping between phrases and their scores.
 
         """
-        try:
-            return self.phrasegrams[tuple(components)]
-        except KeyError:
-            return -1
-
-    def __getitem__(self, sentence):
-        """Convert the input sequence of tokens `sentence` into a sequence of tokens where adjacent
-        tokens are replaced by a single token if they form a bigram collocation.
+        result, source_vocab = {}, phrases_model.vocab
+        for token in source_vocab:
+            unigrams = token.split(self.delimiter)
+            if len(unigrams) < 2:
+                continue  # no phrases here
+            phrase, score = phrases_model.score_candidate(unigrams[0], unigrams[-1], unigrams[1:-1])
+            if score is not None:
+                result[phrase] = score
+        return result
 
-        Parameters
-        ----------
-        sentence : {list of str, iterable of list of str}
-            Input sentence or a stream of sentences.
-
-        Return
-        ------
-        {list of str, iterable of list of str}
-            Sentence or sentences with phrase tokens joined by `self.delimiter` character.
+    def score_candidate(self, word_a, word_b, in_between):
+        phrase = self.delimiter.join([word_a] + in_between + [word_b])
+        score = self.phrasegrams.get(phrase, NEGATIVE_INFINITY)
+        if score > self.threshold:
+            return phrase, score
+        return None, None
 
-        Examples
-        ----------
-        .. sourcecode:: pycon
 
-            >>> from gensim.test.utils import datapath
-            >>> from gensim.models.word2vec import Text8Corpus
-            >>> from gensim.models.phrases import Phrases, Phraser
-            >>>
-            >>> sentences = Text8Corpus(datapath('testcorpus.txt'))  # Read corpus
-            >>>
-            >>> phrases = Phrases(sentences, min_count=1, threshold=1)  # Train model
-            >>> # Create a Phraser object to transform any sentence and turn 2 suitable tokens into 1 phrase
-            >>> phraser_model = Phraser(phrases)
-            >>>
-            >>> sent = [u'trees', u'graph', u'minors']
-            >>> print(phraser_model[sent])
-            [u'trees_graph', u'minors']
-            >>> sent = [[u'trees', u'graph', u'minors'], [u'graph', u'minors']]
-            >>> for phrase in phraser_model[sent]:
-            ...     print(phrase)
-            [u'trees_graph', u'minors']
-            [u'graph_minors']
-
-        """
-        return _sentence2token(self, sentence)
+Phraser = FrozenPhrases  # alias for backward compatibility
 
 
 if __name__ == '__main__':
@@ -935,7 +812,6 @@ def __getitem__(self, sentence):
     from gensim.models.word2vec import Text8Corpus
     sentences = Text8Corpus(infile)
 
-    # test_doc = LineSentence('test/test_data/testcorpus.txt')
     bigram = Phrases(sentences, min_count=5, threshold=100)
     for s in bigram[sentences]:
-        print(utils.to_utf8(u' '.join(s)))
+        print(u' '.join(s))
diff --git a/gensim/sklearn_api/phrases.py b/gensim/sklearn_api/phrases.py
index 1570acf224..4c04292473 100644
--- a/gensim/sklearn_api/phrases.py
+++ b/gensim/sklearn_api/phrases.py
@@ -27,12 +27,12 @@
     >>> assert ['I', 'love', 'computer_science'] == m.fit_transform(texts)[0]
 
 """
-from six import string_types
+
 from sklearn.base import TransformerMixin, BaseEstimator
 from sklearn.exceptions import NotFittedError
 
 from gensim import models
-from gensim.models.phrases import Phraser
+from gensim.models.phrases import FrozenPhrases
 
 
 class PhrasesTransformer(TransformerMixin, BaseEstimator):
@@ -44,8 +44,10 @@ class PhrasesTransformer(TransformerMixin, BaseEstimator):
     <https://svn.spraakdata.gu.se/repos/gerlof/pub/www/Docs/npmi-pfd.pdf>`_.
 
     """
-    def __init__(self, min_count=5, threshold=10.0, max_vocab_size=40000000,
-                 delimiter=b'_', progress_per=10000, scoring='default', common_terms=frozenset()):
+    def __init__(
+            self, min_count=5, threshold=10.0, max_vocab_size=40000000,
+            delimiter='_', progress_per=10000, scoring='default', common_terms=frozenset(),
+        ):
         """
 
         Parameters
@@ -58,7 +60,7 @@ def __init__(self, min_count=5, threshold=10.0, max_vocab_size=40000000,
             Maximum size of the vocabulary. Used to control pruning of less common words, to keep memory under control.
             The default of 40M needs about 3.6GB of RAM.
         delimiter : str, optional
-            Character used to join collocation tokens, should be a byte string (e.g. b'_').
+            Character used to join collocation tokens (e.g. '_').
         progress_per : int, optional
             Training will report to the logger every that many phrases are learned.
         scoring : str or function, optional
@@ -127,7 +129,7 @@ def fit(self, X, y=None):
             max_vocab_size=self.max_vocab_size, delimiter=self.delimiter,
             progress_per=self.progress_per, scoring=self.scoring, common_terms=self.common_terms
         )
-        self.phraser = Phraser(self.gensim_model)
+        self.phraser = FrozenPhrases(self.gensim_model)
         return self
 
     def transform(self, docs):
@@ -152,10 +154,10 @@ def transform(self, docs):
             )
 
         if self.phraser is None:
-            self.phraser = Phraser(self.gensim_model)
+            self.phraser = FrozenPhrases(self.gensim_model)
 
         # input as python lists
-        if isinstance(docs[0], string_types):
+        if isinstance(docs[0], str):
             docs = [docs]
 
         return [self.phraser[doc] for doc in docs]
@@ -186,5 +188,5 @@ def partial_fit(self, X):
             )
 
         self.gensim_model.add_vocab(X)
-        self.phraser = Phraser(self.gensim_model)
+        self.phraser = FrozenPhrases(self.gensim_model)
         return self
diff --git a/gensim/test/test_data/phrases-transformer-new-v3-5-0.pkl b/gensim/test/test_data/phrases-transformer-new-v3-5-0.pkl
deleted file mode 100644
index 7799418058..0000000000
Binary files a/gensim/test/test_data/phrases-transformer-new-v3-5-0.pkl and /dev/null differ
diff --git a/gensim/test/test_data/phrases-transformer-v3-5-0.pkl b/gensim/test/test_data/phrases-transformer-v3-5-0.pkl
deleted file mode 100644
index 8ffef6763b..0000000000
Binary files a/gensim/test/test_data/phrases-transformer-v3-5-0.pkl and /dev/null differ
diff --git a/gensim/test/test_phrases.py b/gensim/test/test_phrases.py
index ed85fea2b5..9c7a73cae4 100644
--- a/gensim/test/test_phrases.py
+++ b/gensim/test/test_phrases.py
@@ -4,140 +4,97 @@
 # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
 
 """
-Automated tests for checking transformation algorithms (the models package).
+Automated tests for the phrase detection module.
 """
 
-
 import logging
 import unittest
 
-import six
 import numpy as np
 
-from gensim.utils import to_unicode
-from gensim.models.phrases import SentenceAnalyzer, Phrases, Phraser
-from gensim.models.phrases import pseudocorpus, original_scorer
+from gensim.models.phrases import Phrases, FrozenPhrases, _PhrasesTransformation
+from gensim.models.phrases import original_scorer
 from gensim.test.utils import common_texts, temporary_file, datapath
 
 
-class TestUtils(unittest.TestCase):
-
-    def test_pseudocorpus_no_common_terms(self):
-        vocab = [
-            "prime_minister",
-            "gold",
-            "chief_technical_officer",
-            "effective"]
-        result = list(pseudocorpus(vocab, "_"))
-        self.assertEqual(
-            result,
-            [["prime", "minister"],
-             ["chief", "technical_officer"],
-             ["chief_technical", "officer"]])
-
-    def test_pseudocorpus_with_common_terms(self):
-        vocab = [
-            "hall_of_fame",
-            "gold",
-            "chief_of_political_bureau",
-            "effective",
-            "beware_of_the_dog_in_the_yard"]
-        common_terms = frozenset(["in", "the", "of"])
-        result = list(pseudocorpus(vocab, "_", common_terms=common_terms))
-        self.assertEqual(
-            result,
-            [["hall", "of", "fame"],
-             ["chief", "of", "political_bureau"],
-             ["chief_of_political", "bureau"],
-             ["beware", "of", "the", "dog_in_the_yard"],
-             ["beware_of_the_dog", "in", "the", "yard"]])
-
-
 class TestPhraseAnalysis(unittest.TestCase):
 
-    class AnalysisTester(SentenceAnalyzer):
+    class AnalysisTester(_PhrasesTransformation):
 
-        def __init__(self, scores):
+        def __init__(self, scores, threshold):
+            super().__init__(common_terms={"a", "the", "with", "of"})
             self.scores = scores
+            self.threshold = threshold
 
-        def score_item(self, worda, wordb, components, scorer):
-            """Override for test purpose"""
-            if worda is not None and wordb is not None:
-                bigram_word = b"_".join(components)
-                return self.scores.get(bigram_word, -1)
-            else:
-                return -1
-
-    def analyze(self, scores, sentence):
-        analyzer = self.AnalysisTester(scores)
-        return list(analyzer.analyze_sentence(
-            sentence,
-            threshold=1,
-            common_terms={b"a", b"the", b"with", b"of"},
-            scorer=None))
-
-    def analyze_words(self, scores, sentence):
-        result = (
-            w if isinstance(w, (tuple, list)) else [w]
-            for w, score in self.analyze(scores, sentence))
-        return [b"_".join(w).decode("utf-8") for w in result]
+        def score_candidate(self, word_a, word_b, in_between):
+            phrase = "_".join([word_a] + in_between + [word_b])
+            score = self.scores.get(phrase, -1)
+            if score > self.threshold:
+                return phrase, score
+            return None, None
 
     def test_simple_analysis(self):
-        s = ["simple", "sentence", "should", "pass"]
-        result = self.analyze_words({}, s)
-        self.assertEqual(result, s)
-        s = ["a", "simple", "sentence", "with", "no", "bigram", "but", "common", "terms"]
-        result = self.analyze_words({}, s)
-        self.assertEqual(result, s)
+        """Test transformation with no phrases."""
+        sentence = ["simple", "sentence", "should", "pass"]
+        result = self.AnalysisTester({}, threshold=1)[sentence]
+        self.assertEqual(result, sentence)
+        sentence = ["a", "simple", "sentence", "with", "no", "bigram", "but", "common", "terms"]
+        result = self.AnalysisTester({}, threshold=1)[sentence]
+        self.assertEqual(result, sentence)
 
     def test_analysis_bigrams(self):
         scores = {
-            b"simple_sentence": 2, b"sentence_many": 2,
-            b"many_possible": 2, b"possible_bigrams": 2}
-        s = ["simple", "sentence", "many", "possible", "bigrams"]
-        result = self.analyze_words(scores, s)
+            "simple_sentence": 2, "sentence_many": 2,
+            "many_possible": 2, "possible_bigrams": 2,
+        }
+        sentence = ["simple", "sentence", "many", "possible", "bigrams"]
+        result = self.AnalysisTester(scores, threshold=1)[sentence]
         self.assertEqual(result, ["simple_sentence", "many_possible", "bigrams"])
 
-        s = ["some", "simple", "sentence", "many", "bigrams"]
-        result = self.analyze_words(scores, s)
+        sentence = ["some", "simple", "sentence", "many", "bigrams"]
+        result = self.AnalysisTester(scores, threshold=1)[sentence]
         self.assertEqual(result, ["some", "simple_sentence", "many", "bigrams"])
 
-        s = ["some", "unrelated", "simple", "words"]
-        result = self.analyze_words(scores, s)
-        self.assertEqual(result, s)
+        sentence = ["some", "unrelated", "simple", "words"]
+        result = self.AnalysisTester(scores, threshold=1)[sentence]
+        self.assertEqual(result, sentence)
 
     def test_analysis_common_terms(self):
         scores = {
-            b"simple_sentence": 2, b"sentence_many": 2,
-            b"many_possible": 2, b"possible_bigrams": 2}
-        s = ["a", "simple", "sentence", "many", "the", "possible", "bigrams"]
-        result = self.analyze_words(scores, s)
+            "simple_sentence": 2, "sentence_many": 2,
+            "many_possible": 2, "possible_bigrams": 2,
+        }
+        sentence = ["a", "simple", "sentence", "many", "the", "possible", "bigrams"]
+        result = self.AnalysisTester(scores, threshold=1)[sentence]
         self.assertEqual(result, ["a", "simple_sentence", "many", "the", "possible_bigrams"])
 
-        s = ["simple", "the", "sentence", "and", "many", "possible", "bigrams", "with", "a"]
-        result = self.analyze_words(scores, s)
-        self.assertEqual(result, [
-            "simple", "the", "sentence", "and", "many_possible", "bigrams", "with", "a"])
+        sentence = ["simple", "the", "sentence", "and", "many", "possible", "bigrams", "with", "a"]
+        result = self.AnalysisTester(scores, threshold=1)[sentence]
+        self.assertEqual(
+            result,
+            ["simple", "the", "sentence", "and", "many_possible", "bigrams", "with", "a"],
+        )
 
     def test_analysis_common_terms_in_between(self):
         scores = {
-            b"simple_sentence": 2, b"sentence_with_many": 2,
-            b"many_possible": 2, b"many_of_the_possible": 2, b"possible_bigrams": 2}
-        s = ["sentence", "with", "many", "possible", "bigrams"]
-        result = self.analyze_words(scores, s)
+            "simple_sentence": 2, "sentence_with_many": 2,
+            "many_possible": 2, "many_of_the_possible": 2, "possible_bigrams": 2,
+        }
+        sentence = ["sentence", "with", "many", "possible", "bigrams"]
+        result = self.AnalysisTester(scores, threshold=1)[sentence]
         self.assertEqual(result, ["sentence_with_many", "possible_bigrams"])
 
-        s = ["a", "simple", "sentence", "with", "many", "of", "the", "possible", "bigrams", "with"]
-        result = self.analyze_words(scores, s)
+        sentence = ["a", "simple", "sentence", "with", "many", "of", "the", "possible", "bigrams", "with"]
+        result = self.AnalysisTester(scores, threshold=1)[sentence]
         self.assertEqual(
             result, ["a", "simple_sentence", "with", "many_of_the_possible", "bigrams", "with"])
 
 
 class PhrasesData:
+
     sentences = common_texts + [
-        ['graph', 'minors', 'survey', 'human', 'interface']
+        ['graph', 'minors', 'survey', 'human', 'interface'],
     ]
-    unicode_sentences = [[to_unicode(w) for w in sentence] for sentence in sentences]
     common_terms = frozenset()
 
     bigram1 = u'response_time'
@@ -148,24 +105,18 @@ def gen_sentences(self):
         return ((w for w in sentence) for sentence in self.sentences)
 
 
-class PhrasesCommon:
-    """ Tests that need to be run for both Phrases and Phraser classes."""
+class PhrasesCommon(PhrasesData):
+    """Tests for both Phrases and FrozenPhrases classes."""
 
     def setUp(self):
-        self.bigram = Phrases(
-            self.sentences, min_count=1, threshold=1, common_terms=self.common_terms)
-        self.bigram_default = Phrases(
-            self.sentences, common_terms=self.common_terms)
-        self.bigram_utf8 = Phrases(
-            self.sentences, min_count=1, threshold=1, common_terms=self.common_terms)
-        self.bigram_unicode = Phrases(
-            self.unicode_sentences, min_count=1, threshold=1, common_terms=self.common_terms)
+        self.bigram = Phrases(self.sentences, min_count=1, threshold=1, common_terms=self.common_terms)
+        self.bigram_default = Phrases(self.sentences, common_terms=self.common_terms)
 
     def testEmptyPhrasifiedSentencesIterator(self):
         bigram_phrases = Phrases(self.sentences)
-        bigram_phraser = Phraser(bigram_phrases)
+        bigram_phraser = FrozenPhrases(bigram_phrases)
         trigram_phrases = Phrases(bigram_phraser[self.sentences])
-        trigram_phraser = Phraser(trigram_phrases)
+        trigram_phraser = FrozenPhrases(trigram_phrases)
         trigrams = trigram_phraser[bigram_phraser[self.sentences]]
         fst, snd = list(trigrams), list(trigrams)
         self.assertEqual(fst, snd)
@@ -187,22 +138,27 @@ def testEmptyInputsOnBigramConstruction(self):
     def testSentenceGeneration(self):
         """Test basic bigram using a dummy corpus."""
         # test that we generate the same amount of sentences as the input
-        self.assertEqual(len(self.sentences), len(list(self.bigram_default[self.sentences])))
+        self.assertEqual(
+            len(self.sentences),
+            len(list(self.bigram_default[self.sentences])),
+        )
 
     def testSentenceGenerationWithGenerator(self):
         """Test basic bigram production when corpus is a generator."""
-        self.assertEqual(len(list(self.gen_sentences())),
-                         len(list(self.bigram_default[self.gen_sentences()])))
+        self.assertEqual(
+            len(list(self.gen_sentences())),
+            len(list(self.bigram_default[self.gen_sentences()])),
+        )
 
     def testBigramConstruction(self):
-        """Test Phrases bigram construction building."""
+        """Test Phrases bigram construction."""
         # with this setting we should get response_time and graph_minors
         bigram1_seen = False
         bigram2_seen = False
-        for s in self.bigram[self.sentences]:
-            if not bigram1_seen and self.bigram1 in s:
+        for sentence in self.bigram[self.sentences]:
+            if not bigram1_seen and self.bigram1 in sentence:
                 bigram1_seen = True
-            if not bigram2_seen and self.bigram2 in s:
+            if not bigram2_seen and self.bigram2 in sentence:
                 bigram2_seen = True
             if bigram1_seen and bigram2_seen:
                 break
@@ -218,7 +174,7 @@ def testBigramConstruction(self):
         self.assertTrue(self.bigram3 in self.bigram[self.sentences[-1]])
 
     def testBigramConstructionFromGenerator(self):
-        """Test Phrases bigram construction building when corpus is a generator"""
+        """Test Phrases bigram construction building when corpus is a generator."""
         bigram1_seen = False
         bigram2_seen = False
 
@@ -232,7 +188,7 @@ def testBigramConstructionFromGenerator(self):
         self.assertTrue(bigram1_seen and bigram2_seen)
 
     def testBigramConstructionFromArray(self):
-        """Test Phrases bigram construction building when corpus is a numpy array"""
+        """Test Phrases bigram construction building when corpus is a numpy array."""
         bigram1_seen = False
         bigram2_seen = False
 
@@ -245,16 +201,6 @@ def testBigramConstructionFromArray(self):
                 break
         self.assertTrue(bigram1_seen and bigram2_seen)
 
-    def testEncoding(self):
-        """Test that both utf8 and unicode input work; output must be unicode."""
-        expected = [u'survey', u'user', u'computer', u'system', u'response_time']
-
-        self.assertEqual(self.bigram_utf8[self.sentences[1]], expected)
-        self.assertEqual(self.bigram_unicode[self.sentences[1]], expected)
-
-        transformed = ' '.join(self.bigram_utf8[self.sentences[1]])
-        self.assertTrue(isinstance(transformed, six.text_type))
-
 
 # scorer for testCustomScorer
 # function is outside of the scope of the test because for picklability of custom scorer
@@ -264,43 +210,32 @@ def dumb_scorer(worda_count, wordb_count, bigram_count, len_vocab, min_count, co
     return 1
 
 
-class TestPhrasesModel(PhrasesData, PhrasesCommon, unittest.TestCase):
+class TestPhrasesModel(PhrasesCommon, unittest.TestCase):
 
     def testExportPhrases(self):
-        """Test Phrases bigram export_phrases functionality."""
-        bigram = Phrases(self.sentences, min_count=1, threshold=1)
-
-        seen_bigrams = set()
-
-        for phrase, score in bigram.export_phrases(self.sentences):
-            seen_bigrams.add(phrase)
+        """Test Phrases bigram export phrases."""
+        bigram = Phrases(self.sentences, min_count=1, threshold=1, delimiter=' ')
+        seen_bigrams = set(bigram.export_phrases(self.sentences).keys())
 
         assert seen_bigrams == {
-            b'response time',
-            b'graph minors',
-            b'human interface',
+            'response time',
+            'graph minors',
+            'human interface',
         }
 
     def testMultipleBigramsSingleEntry(self):
-        """ a single entry should produce multiple bigrams. """
-        bigram = Phrases(self.sentences, min_count=1, threshold=1)
-        seen_bigrams = set()
-
+        """Test a single entry produces multiple bigrams."""
+        bigram = Phrases(self.sentences, min_count=1, threshold=1, delimiter=' ')
         test_sentences = [['graph', 'minors', 'survey', 'human', 'interface']]
-        for phrase, score in bigram.export_phrases(test_sentences):
-            seen_bigrams.add(phrase)
+        seen_bigrams = set(bigram.export_phrases(test_sentences).keys())
 
-        assert seen_bigrams == {b'graph minors', b'human interface'}
+        assert seen_bigrams == {'graph minors', 'human interface'}
 
     def testScoringDefault(self):
-        """ test the default scoring, from the mikolov word2vec paper """
-        bigram = Phrases(self.sentences, min_count=1, threshold=1)
-
-        seen_scores = set()
-
+        """Test the default scoring, from the mikolov word2vec paper."""
+        bigram = Phrases(self.sentences, min_count=1, threshold=1, delimiter=' ')
         test_sentences = [['graph', 'minors', 'survey', 'human', 'interface']]
-        for phrase, score in bigram.export_phrases(test_sentences):
-            seen_scores.add(round(score, 3))
+        seen_scores = set(round(score, 3) for score in bigram.export_phrases(test_sentences).values())
 
         assert seen_scores == {
             5.167,  # score for graph minors
@@ -308,22 +243,18 @@ def testScoringDefault(self):
         }
 
     def test__getitem__(self):
-        """ test Phrases[sentences] with a single sentence"""
+        """Test Phrases[sentences] with a single sentence."""
         bigram = Phrases(self.sentences, min_count=1, threshold=1)
-        # pdb.set_trace()
         test_sentences = [['graph', 'minors', 'survey', 'human', 'interface']]
         phrased_sentence = next(bigram[test_sentences].__iter__())
 
         assert phrased_sentence == ['graph_minors', 'survey', 'human_interface']
 
     def testScoringNpmi(self):
-        """ test normalized pointwise mutual information scoring """
+        """Test normalized pointwise mutual information scoring."""
         bigram = Phrases(self.sentences, min_count=1, threshold=.5, scoring='npmi')
-
-        seen_scores = set()
         test_sentences = [['graph', 'minors', 'survey', 'human', 'interface']]
-        for phrase, score in bigram.export_phrases(test_sentences):
-            seen_scores.add(round(score, 3))
+        seen_scores = set(round(score, 3) for score in bigram.export_phrases(test_sentences).values())
 
         assert seen_scores == {
             .882,  # score for graph minors
@@ -331,16 +262,12 @@ def testScoringNpmi(self):
         }
 
     def testCustomScorer(self):
-        """ test using a custom scoring function """
-
+        """Test using a custom scoring function."""
         bigram = Phrases(self.sentences, min_count=1, threshold=.001, scoring=dumb_scorer)
-
-        seen_scores = []
         test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
-        for phrase, score in bigram.export_phrases(test_sentences):
-            seen_scores.append(score)
+        seen_scores = list(bigram.export_phrases(test_sentences).values())
 
-        assert all(seen_scores)  # all scores 1
+        assert all(score == 1 for score in seen_scores)
         assert len(seen_scores) == 3  # 'graph minors' and 'survey human' and 'interface system'
 
     def testBadParameters(self):
@@ -361,31 +288,25 @@ def testPruning(self):
 class TestPhrasesPersistence(PhrasesData, unittest.TestCase):
 
     def testSaveLoadCustomScorer(self):
-        """ saving and loading a Phrases object with a custom scorer """
-
+        """Test saving and loading a Phrases object with a custom scorer."""
         with temporary_file("test.pkl") as fpath:
             bigram = Phrases(self.sentences, min_count=1, threshold=.001, scoring=dumb_scorer)
             bigram.save(fpath)
             bigram_loaded = Phrases.load(fpath)
-            seen_scores = []
             test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
-            for phrase, score in bigram_loaded.export_phrases(test_sentences):
-                seen_scores.append(score)
+            seen_scores = list(bigram_loaded.export_phrases(test_sentences).values())
 
-            assert all(seen_scores)  # all scores 1
+            assert all(score == 1 for score in seen_scores)
             assert len(seen_scores) == 3  # 'graph minors' and 'survey human' and 'interface system'
 
     def testSaveLoad(self):
-        """ Saving and loading a Phrases object."""
-
+        """Test saving and loading a Phrases object."""
         with temporary_file("test.pkl") as fpath:
             bigram = Phrases(self.sentences, min_count=1, threshold=1)
             bigram.save(fpath)
             bigram_loaded = Phrases.load(fpath)
-            seen_scores = set()
             test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
-            for phrase, score in bigram_loaded.export_phrases(test_sentences):
-                seen_scores.add(round(score, 3))
+            seen_scores = set(round(score, 3) for score in bigram_loaded.export_phrases(test_sentences).values())
 
             assert seen_scores == set([
                 5.167,  # score for graph minors
@@ -393,13 +314,10 @@ def testSaveLoad(self):
             ])
 
     def testSaveLoadStringScoring(self):
-        """ Saving and loading a Phrases object with a string scoring parameter.
-        This should ensure backwards compatibility with the previous version of Phrases"""
+        """Test backwards compatibility with a previous version of Phrases with custom scoring."""
         bigram_loaded = Phrases.load(datapath("phrases-scoring-str.pkl"))
-        seen_scores = set()
         test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
-        for phrase, score in bigram_loaded.export_phrases(test_sentences):
-            seen_scores.add(round(score, 3))
+        seen_scores = set(round(score, 3) for score in bigram_loaded.export_phrases(test_sentences).values())
 
         assert seen_scores == set([
             5.167,  # score for graph minors
@@ -407,14 +325,10 @@ def testSaveLoadStringScoring(self):
         ])
 
     def testSaveLoadNoScoring(self):
-        """ Saving and loading a Phrases object with no scoring parameter.
-        This should ensure backwards compatibility with old versions of Phrases"""
-
+        """Test backwards compatibility with old versions of Phrases with no scoring parameter."""
         bigram_loaded = Phrases.load(datapath("phrases-no-scoring.pkl"))
-        seen_scores = set()
         test_sentences = [['graph', 'minors', 'survey', 'human', 'interface', 'system']]
-        for phrase, score in bigram_loaded.export_phrases(test_sentences):
-            seen_scores.add(round(score, 3))
+        seen_scores = set(round(score, 3) for score in bigram_loaded.export_phrases(test_sentences).values())
 
         assert seen_scores == set([
             5.167,  # score for graph minors
@@ -426,77 +340,67 @@ def testSaveLoadNoCommonTerms(self):
         bigram_loaded = Phrases.load(datapath("phrases-no-common-terms.pkl"))
         self.assertEqual(bigram_loaded.common_terms, frozenset())
         # can make a phraser, cf #1751
-        phraser = Phraser(bigram_loaded)  # does not raise
+        phraser = FrozenPhrases(bigram_loaded)  # does not raise
         phraser[["human", "interface", "survey"]]  # does not raise
 
 
-class TestPhraserPersistence(PhrasesData, unittest.TestCase):
+class TestFrozenPhrasesPersistence(PhrasesData, unittest.TestCase):
 
     def testSaveLoadCustomScorer(self):
-        """Saving and loading a Phraser object with a custom scorer """
+        """Test saving and loading a FrozenPhrases object with a custom scorer."""
 
         with temporary_file("test.pkl") as fpath:
-            bigram = Phraser(
+            bigram = FrozenPhrases(
                 Phrases(self.sentences, min_count=1, threshold=.001, scoring=dumb_scorer))
             bigram.save(fpath)
-            bigram_loaded = Phraser.load(fpath)
-            # we do not much with scoring, just verify its the one expected
+            bigram_loaded = FrozenPhrases.load(fpath)
             self.assertEqual(bigram_loaded.scoring, dumb_scorer)
 
     def testSaveLoad(self):
-        """ Saving and loading a Phraser object."""
+        """Test saving and loading a FrozenPhrases object."""
         with temporary_file("test.pkl") as fpath:
-            bigram = Phraser(Phrases(self.sentences, min_count=1, threshold=1))
+            bigram = FrozenPhrases(Phrases(self.sentences, min_count=1, threshold=1))
             bigram.save(fpath)
-            bigram_loaded = Phraser.load(fpath)
+            bigram_loaded = FrozenPhrases.load(fpath)
             self.assertEqual(
                 bigram_loaded[['graph', 'minors', 'survey', 'human', 'interface', 'system']],
                 ['graph_minors', 'survey', 'human_interface', 'system'])
 
     def testSaveLoadStringScoring(self):
-        """ Saving and loading a Phraser object with a string scoring parameter.
-        This should ensure backwards compatibility with the previous version of Phraser"""
-        bigram_loaded = Phraser.load(datapath("phraser-scoring-str.pkl"))
+        """Test saving and loading a FrozenPhrases object with a string scoring parameter.
+        This should ensure backwards compatibility with the previous version of FrozenPhrases"""
+        bigram_loaded = FrozenPhrases.load(datapath("phraser-scoring-str.pkl"))
         # we do not much with scoring, just verify its the one expected
         self.assertEqual(bigram_loaded.scoring, original_scorer)
 
     def testSaveLoadNoScoring(self):
-        """ Saving and loading a Phraser object with no scoring parameter.
-        This should ensure backwards compatibility with old versions of Phraser"""
-        bigram_loaded = Phraser.load(datapath("phraser-no-scoring.pkl"))
+        """Test saving and loading a FrozenPhrases object with no scoring parameter.
+        This should ensure backwards compatibility with old versions of FrozenPhrases"""
+        bigram_loaded = FrozenPhrases.load(datapath("phraser-no-scoring.pkl"))
         # we do not much with scoring, just verify its the one expected
         self.assertEqual(bigram_loaded.scoring, original_scorer)
 
     def testSaveLoadNoCommonTerms(self):
-        """ Ensure backwards compatibility with old versions of Phraser, before common_terms"""
-        bigram_loaded = Phraser.load(datapath("phraser-no-common-terms.pkl"))
+        """Ensure backwards compatibility with old versions of FrozenPhrases, before common_terms."""
+        bigram_loaded = FrozenPhrases.load(datapath("phraser-no-common-terms.pkl"))
         self.assertEqual(bigram_loaded.common_terms, frozenset())
 
 
-class TestPhraserModel(PhrasesData, PhrasesCommon, unittest.TestCase):
-    """ Test Phraser models."""
+class TestFrozenPhrasesModel(PhrasesCommon, unittest.TestCase):
+    """Test FrozenPhrases models."""
 
     def setUp(self):
-        """Set up Phraser models for the tests."""
+        """Set up FrozenPhrases models for the tests."""
         bigram_phrases = Phrases(
             self.sentences, min_count=1, threshold=1, common_terms=self.common_terms)
-        self.bigram = Phraser(bigram_phrases)
+        self.bigram = FrozenPhrases(bigram_phrases)
 
         bigram_default_phrases = Phrases(self.sentences, common_terms=self.common_terms)
-        self.bigram_default = Phraser(bigram_default_phrases)
-
-        bigram_utf8_phrases = Phrases(
-            self.sentences, min_count=1, threshold=1, common_terms=self.common_terms)
-        self.bigram_utf8 = Phraser(bigram_utf8_phrases)
-
-        bigram_unicode_phrases = Phrases(
-            self.unicode_sentences, min_count=1, threshold=1, common_terms=self.common_terms)
-        self.bigram_unicode = Phraser(bigram_unicode_phrases)
+        self.bigram_default = FrozenPhrases(bigram_default_phrases)
 
 
 class CommonTermsPhrasesData:
-    """This mixin permits to reuse the test, using, this time the common_terms option
-    """
+    """This mixin permits to reuse tests with the common_terms option."""
 
     sentences = [
         ['human', 'interface', 'with', 'computer'],
@@ -510,7 +414,6 @@ class CommonTermsPhrasesData:
         ['data', 'and', 'graph', 'survey'],
         ['data', 'and', 'graph', 'survey', 'for', 'human', 'interface']  # test bigrams within same sentence
     ]
-    unicode_sentences = [[to_unicode(w) for w in sentence] for sentence in sentences]
     common_terms = ['of', 'and', 'for']
 
     bigram1 = u'lack_of_interest'
@@ -527,63 +430,43 @@ def gen_sentences(self):
 class TestPhrasesModelCommonTerms(CommonTermsPhrasesData, TestPhrasesModel):
     """Test Phrases models with common terms"""
 
-    def testEncoding(self):
-        """Test that both utf8 and unicode input work; output must be unicode."""
-        expected = [u'survey', u'of', u'user', u'computer', u'system', u'lack_of_interest']
-
-        self.assertEqual(self.bigram_utf8[self.sentences[1]], expected)
-        self.assertEqual(self.bigram_unicode[self.sentences[1]], expected)
-
-        transformed = ' '.join(self.bigram_utf8[self.sentences[1]])
-        self.assertTrue(isinstance(transformed, six.text_type))
-
     def testMultipleBigramsSingleEntry(self):
-        """ a single entry should produce multiple bigrams. """
-        bigram = Phrases(self.sentences, min_count=1, threshold=1, common_terms=self.common_terms)
-
-        seen_bigrams = set()
+        """Test a single entry produces multiple bigrams."""
+        bigram = Phrases(self.sentences, min_count=1, threshold=1, common_terms=self.common_terms, delimiter=' ')
         test_sentences = [['data', 'and', 'graph', 'survey', 'for', 'human', 'interface']]
-        for phrase, score in bigram.export_phrases(test_sentences):
-            seen_bigrams.add(phrase)
+        seen_bigrams = set(bigram.export_phrases(test_sentences).keys())
+
         assert seen_bigrams == set([
-            b'data and graph',
-            b'human interface',
+            'data and graph',
+            'human interface',
         ])
 
     def testExportPhrases(self):
-        """Test Phrases bigram export_phrases functionality."""
-        bigram = Phrases(self.sentences, min_count=1, threshold=1, common_terms=self.common_terms)
-
-        seen_bigrams = set()
-
-        for phrase, score in bigram.export_phrases(self.sentences):
-            seen_bigrams.add(phrase)
+        """Test Phrases bigram export phrases."""
+        bigram = Phrases(self.sentences, min_count=1, threshold=1, common_terms=self.common_terms, delimiter=' ')
+        seen_bigrams = set(bigram.export_phrases(self.sentences).keys())
 
         assert seen_bigrams == set([
-            b'human interface',
-            b'graph of trees',
-            b'data and graph',
-            b'lack of interest',
+            'human interface',
+            'graph of trees',
+            'data and graph',
+            'lack of interest',
         ])
 
     def testScoringDefault(self):
         """ test the default scoring, from the mikolov word2vec paper """
         bigram = Phrases(self.sentences, min_count=1, threshold=1, common_terms=self.common_terms)
-
-        seen_scores = set()
-
         test_sentences = [['data', 'and', 'graph', 'survey', 'for', 'human', 'interface']]
-        for phrase, score in bigram.export_phrases(test_sentences):
-            seen_scores.add(round(score, 3))
+        seen_scores = set(round(score, 3) for score in bigram.export_phrases(test_sentences).values())
 
         min_count = float(bigram.min_count)
         len_vocab = float(len(bigram.vocab))
-        graph = float(bigram.vocab[b"graph"])
-        data = float(bigram.vocab[b"data"])
-        data_and_graph = float(bigram.vocab[b"data_and_graph"])
-        human = float(bigram.vocab[b"human"])
-        interface = float(bigram.vocab[b"interface"])
-        human_interface = float(bigram.vocab[b"human_interface"])
+        graph = float(bigram.vocab["graph"])
+        data = float(bigram.vocab["data"])
+        data_and_graph = float(bigram.vocab["data_and_graph"])
+        human = float(bigram.vocab["human"])
+        interface = float(bigram.vocab["interface"])
+        human_interface = float(bigram.vocab["human_interface"])
 
         assert seen_scores == set([
             # score for data and graph
@@ -593,15 +476,13 @@ def testScoringDefault(self):
         ])
 
     def testScoringNpmi(self):
-        """ test normalized pointwise mutual information scoring """
-        bigram = Phrases(self.sentences, min_count=1, threshold=.5,
-                         scoring='npmi', common_terms=self.common_terms)
-
-        seen_scores = set()
-
+        """Test normalized pointwise mutual information scoring."""
+        bigram = Phrases(
+            self.sentences, min_count=1, threshold=.5,
+            scoring='npmi', common_terms=self.common_terms,
+        )
         test_sentences = [['data', 'and', 'graph', 'survey', 'for', 'human', 'interface']]
-        for phrase, score in bigram.export_phrases(test_sentences):
-            seen_scores.add(round(score, 3))
+        seen_scores = set(round(score, 3) for score in bigram.export_phrases(test_sentences).values())
 
         assert seen_scores == set([
             .74,  # score for data and graph
@@ -609,56 +490,35 @@ def testScoringNpmi(self):
         ])
 
     def testCustomScorer(self):
-        """ test using a custom scoring function """
-
-        bigram = Phrases(self.sentences, min_count=1, threshold=.001,
-                         scoring=dumb_scorer, common_terms=self.common_terms)
-
-        seen_scores = []
+        """Test using a custom scoring function."""
+        bigram = Phrases(
+            self.sentences, min_count=1, threshold=.001,
+            scoring=dumb_scorer, common_terms=self.common_terms,
+        )
         test_sentences = [['data', 'and', 'graph', 'survey', 'for', 'human', 'interface']]
-        for phrase, score in bigram.export_phrases(test_sentences):
-            seen_scores.append(score)
+        seen_scores = list(bigram.export_phrases(test_sentences).values())
 
         assert all(seen_scores)  # all scores 1
         assert len(seen_scores) == 2  # 'data and graph' 'survey for human'
 
     def test__getitem__(self):
-        """ test Phrases[sentences] with a single sentence"""
+        """Test Phrases[sentences] with a single sentence."""
         bigram = Phrases(self.sentences, min_count=1, threshold=1, common_terms=self.common_terms)
-        # pdb.set_trace()
         test_sentences = [['data', 'and', 'graph', 'survey', 'for', 'human', 'interface']]
         phrased_sentence = next(bigram[test_sentences].__iter__())
 
         assert phrased_sentence == ['data_and_graph', 'survey', 'for', 'human_interface']
 
 
-class TestPhraserModelCommonTerms(CommonTermsPhrasesData, TestPhraserModel):
-
-    def testEncoding(self):
-        """Test that both utf8 and unicode input work; output must be unicode."""
-        expected = [u'survey', u'of', u'user', u'computer', u'system', u'lack_of_interest']
-
-        self.assertEqual(self.bigram_utf8[self.sentences[1]], expected)
-        self.assertEqual(self.bigram_unicode[self.sentences[1]], expected)
-
-        transformed = ' '.join(self.bigram_utf8[self.sentences[1]])
-        self.assertTrue(isinstance(transformed, six.text_type))
-
-
-class TestPhraserModelCompatibilty(unittest.TestCase):
+class TestFrozenPhrasesModelCompatibilty(unittest.TestCase):
 
     def testCompatibilty(self):
-        phr = Phraser.load(datapath("phraser-3.6.0.model"))
-        model = Phrases.load(datapath("phrases-3.6.0.model"))
-
+        phrases = Phrases.load(datapath("phrases-3.6.0.model"))
+        phraser = FrozenPhrases.load(datapath("phraser-3.6.0.model"))
         test_sentences = ['trees', 'graph', 'minors']
-        expected_res = ['trees', 'graph_minors']
-
-        phr_out = phr[test_sentences]
-        model_out = model[test_sentences]
 
-        self.assertEqual(phr_out, expected_res)
-        self.assertEqual(model_out, expected_res)
+        self.assertEqual(phrases[test_sentences], ['trees', 'graph_minors'])
+        self.assertEqual(phraser[test_sentences], ['trees', 'graph_minors'])
 
 
 if __name__ == '__main__':
diff --git a/gensim/test/test_sklearn_api.py b/gensim/test/test_sklearn_api.py
index 9dc7d303eb..b6b9449eb4 100644
--- a/gensim/test/test_sklearn_api.py
+++ b/gensim/test/test_sklearn_api.py
@@ -1137,7 +1137,7 @@ def testPartialFit(self):
         new_sentences = [
             ['world', 'peace', 'humans', 'world', 'peace', 'world', 'peace', 'people'],
             ['world', 'peace', 'people'],
-            ['world', 'peace', 'humans']
+            ['world', 'peace', 'humans'],
         ]
         self.model.partial_fit(X=new_sentences)  # train model with new sentences
 
@@ -1182,30 +1182,6 @@ def setUp(self):
             [u'the', u'bank_of_america', u'offices', u'are', u'closed']
         ]
 
-    def testCompareToOld(self):
-        with open(datapath("phrases-transformer-v3-5-0.pkl"), "rb") as old_phrases_transformer_pkl:
-            old_phrases_transformer = pickle.load(old_phrases_transformer_pkl)
-        doc = phrases_sentences[-1]
-        phrase_tokens = old_phrases_transformer.transform(doc)[0]
-        expected_phrase_tokens = [u'graph_minors', u'survey', u'human_interface']
-        self.assertEqual(phrase_tokens, expected_phrase_tokens)
-
-        self.model.fit(phrases_sentences)
-        new_phrase_tokens = self.model.transform(doc)[0]
-        self.assertEqual(new_phrase_tokens, phrase_tokens)
-
-    def testLoadNew(self):
-        with open(datapath("phrases-transformer-new-v3-5-0.pkl"), "rb") as new_phrases_transformer_pkl:
-            old_phrases_transformer = pickle.load(new_phrases_transformer_pkl)
-        doc = phrases_sentences[-1]
-        phrase_tokens = old_phrases_transformer.transform(doc)[0]
-        expected_phrase_tokens = [u'graph_minors', u'survey', u'human_interface']
-        self.assertEqual(phrase_tokens, expected_phrase_tokens)
-
-        self.model.fit(phrases_sentences)
-        new_phrase_tokens = self.model.transform(doc)[0]
-        self.assertEqual(new_phrase_tokens, phrase_tokens)
-
     def testFitAndTransform(self):
         self.model.fit(phrases_w_common_terms)
 
@@ -1247,10 +1223,7 @@ def testPartialFit(self):
         self.assertEqual(transformed_2, expected_transformations_2)
 
 
-# specifically test pluggable scoring in Phrases, because possible pickling issues with function parameter
-
-# this is intentionally in main rather than a class method to support pickling
-# all scores will be 1
+# For testing pluggable scoring in Phrases – must remain pickleable.
 def dumb_scorer(worda_count, wordb_count, bigram_count, len_vocab, min_count, corpus_word_count):
     return 1