From 5342153eb4f4b02bb45bfa3951eef8250ac9f6b6 Mon Sep 17 00:00:00 2001 From: Menshikh Ivan Date: Thu, 1 Feb 2018 10:39:25 +0500 Subject: [PATCH] Remove outdated `bz2` + `MmCorpus` examples from tutorials (#1867) --- docs/src/dist_lsi.rst | 3 +-- docs/src/wiki.rst | 6 ++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/docs/src/dist_lsi.rst b/docs/src/dist_lsi.rst index d221c330e9..e80ca3809d 100644 --- a/docs/src/dist_lsi.rst +++ b/docs/src/dist_lsi.rst @@ -120,14 +120,13 @@ Distributed LSA on Wikipedia First, download and prepare the Wikipedia corpus as per :doc:`wiki`, then load the corpus iterator with:: - >>> import logging, gensim, bz2 + >>> import logging, gensim >>> logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) >>> # load id->word mapping (the dictionary) >>> id2word = gensim.corpora.Dictionary.load_from_text('wiki_en_wordids.txt') >>> # load corpus iterator >>> mm = gensim.corpora.MmCorpus('wiki_en_tfidf.mm') - >>> # mm = gensim.corpora.MmCorpus(bz2.BZ2File('wiki_en_tfidf.mm.bz2')) # use this if you compressed the TFIDF output >>> print(mm) MmCorpus(3199665 documents, 100000 features, 495547400 non-zero entries) diff --git a/docs/src/wiki.rst b/docs/src/wiki.rst index fc6c0f9e5d..47aeaa34fd 100644 --- a/docs/src/wiki.rst +++ b/docs/src/wiki.rst @@ -38,14 +38,13 @@ Latent Semantic Analysis First let's load the corpus iterator and dictionary, created in the second step above:: - >>> import logging, gensim, bz2 + >>> import logging, gensim >>> logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) >>> # load id->word mapping (the dictionary), one of the results of step 2 above >>> id2word = gensim.corpora.Dictionary.load_from_text('wiki_en_wordids.txt') >>> # load corpus iterator >>> mm = gensim.corpora.MmCorpus('wiki_en_tfidf.mm') - >>> # mm = gensim.corpora.MmCorpus(bz2.BZ2File('wiki_en_tfidf.mm.bz2')) # use this if you compressed the TFIDF output (recommended) >>> print(mm) MmCorpus(3931787 documents, 100000 features, 756379027 non-zero entries) @@ -93,14 +92,13 @@ Latent Dirichlet Allocation As with Latent Semantic Analysis above, first load the corpus iterator and dictionary:: - >>> import logging, gensim, bz2 + >>> import logging, gensim >>> logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) >>> # load id->word mapping (the dictionary), one of the results of step 2 above >>> id2word = gensim.corpora.Dictionary.load_from_text('wiki_en_wordids.txt') >>> # load corpus iterator >>> mm = gensim.corpora.MmCorpus('wiki_en_tfidf.mm') - >>> # mm = gensim.corpora.MmCorpus(bz2.BZ2File('wiki_en_tfidf.mm.bz2')) # use this if you compressed the TFIDF output >>> print(mm) MmCorpus(3931787 documents, 100000 features, 756379027 non-zero entries)