From 6456cbcd75e6f8720451766ba31cc046b4463ae2 Mon Sep 17 00:00:00 2001
From: Andrey <akutuzov72@gmail.com>
Date: Wed, 13 Jan 2016 14:41:49 +0100
Subject: [PATCH] Hyperparameters' default values are aligned with Mikolov's
 word2vec.

---
 gensim/models/word2vec.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py
index a103754d10..340ff053d5 100755
--- a/gensim/models/word2vec.py
+++ b/gensim/models/word2vec.py
@@ -336,8 +336,8 @@ class Word2Vec(utils.SaveLoad):
     """
     def __init__(
             self, sentences=None, size=100, alpha=0.025, window=5, min_count=5,
-            max_vocab_size=None, sample=0, seed=1, workers=1, min_alpha=0.0001,
-            sg=1, hs=1, negative=0, cbow_mean=1, hashfxn=hash, iter=1, null_word=0,
+            max_vocab_size=None, sample=1e-3, seed=1, workers=12, min_alpha=0.0001,
+            sg=0, hs=0, negative=5, cbow_mean=1, hashfxn=hash, iter=5, null_word=0,
             trim_rule=None, sorted_vocab=1):
         """
         Initialize the model from an iterable of `sentences`. Each sentence is a
@@ -351,8 +351,8 @@ def __init__(
         If you don't supply `sentences`, the model is left uninitialized -- use if
         you plan to initialize it in some other way.
 
-        `sg` defines the training algorithm. By default (`sg=1`), skip-gram is used.
-        Otherwise, `cbow` is employed.
+        `sg` defines the training algorithm. By default (`sg=0`), CBOW is used.
+        Otherwise (`sg=1`), SkipGram is employed.
 
         `size` is the dimensionality of the feature vectors.
 
@@ -370,14 +370,15 @@ def __init__(
         need about 1GB of RAM. Set to `None` for no limit (default).
 
         `sample` = threshold for configuring which higher-frequency words are randomly downsampled;
-            default is 0 (off), useful value is 1e-5.
+            default is 1e-3, useful value is 1e-5, 0 stands for off.
 
         `workers` = use this many worker threads to train the model (=faster training with multicore machines).
 
-        `hs` = if 1 (default), hierarchical sampling will be used for model training (else set to 0).
+        `hs` = if 1, hierarchical sampling will be used for model training (default is set to 0, thus negative sampling is used).
 
         `negative` = if > 0, negative sampling will be used, the int for negative
         specifies how many "noise words" should be drawn (usually between 5-20).
+        Default is 5.
 
         `cbow_mean` = if 0, use the sum of the context word vectors. If 1 (default), use the mean.
         Only applies when cbow is used.