From 465224188740e3c46f71775c098232c55340d731 Mon Sep 17 00:00:00 2001
From: kirit93 <kirit.thadaka@gmail.com>
Date: Fri, 24 Mar 2017 00:23:44 +0530
Subject: [PATCH 01/14] Added test case to check if exception is raised from
 check_output and replaced deprecated assertEquals with assertEqual

---
 gensim/test/test_utils.py | 16 ++++++++++++--
 gensim/utils.py           | 46 ++++++++++++++++++++++-----------------
 2 files changed, 40 insertions(+), 22 deletions(-)

diff --git a/gensim/test/test_utils.py b/gensim/test/test_utils.py
index 41f20eb232..eb58cfbaa7 100644
--- a/gensim/test/test_utils.py
+++ b/gensim/test/test_utils.py
@@ -79,7 +79,7 @@ def test_decode_entities(self):
         # create a string that fails to decode with unichr on narrow python builds
         body = u'It&#146;s the Year of the Horse. YES VIN DIESEL &#128588; &#128175;'
         expected = u'It\x92s the Year of the Horse. YES VIN DIESEL \U0001f64c \U0001f4af'
-        self.assertEquals(utils.decode_htmlentities(body), expected)
+        self.assertEqual(utils.decode_htmlentities(body), expected)
 
 class TestSampleDict(unittest.TestCase):
     def test_sample_dict(self):
@@ -90,7 +90,19 @@ def test_sample_dict(self):
         self.assertEqual(sampled_dict,expected_dict)
         sampled_dict_random = utils.sample_dict(d,2)
         if sampled_dict_random in expected_dict_random:
-            self.assertTrue(True)    
+            self.assertTrue(True)
+
+class TestCheckOutput(unittest.TestCase):
+    def test_check_output(self):
+        res = utils.check_output(args=["echo", "hello"])
+        self.assertEqual(res, b'hello\n')
+
+    def test_check_output_exception(self):
+        error = utils.check_output(args=['ldfs'])
+        self.assertEqual(error, "subprocess.check_output could not execute command ' ldfs '")
+
+    def test_exception(self):
+        self.assertRaises(Exception, utils.check_output(args=['pythons']))
 
 
 
diff --git a/gensim/utils.py b/gensim/utils.py
index 2d0eb90c47..271f7b1547 100644
--- a/gensim/utils.py
+++ b/gensim/utils.py
@@ -1146,28 +1146,34 @@ def keep_vocab_item(word, count, min_count, trim_rule=None):
         else:
             return default_res
 
-def check_output(stdout=subprocess.PIPE, *popenargs, **kwargs):
-    r"""Run command with arguments and return its output as a byte string.
-    Backported from Python 2.7 as it's implemented as pure python on stdlib.
-    >>> check_output(args=['/usr/bin/python', '--version'])
-    Python 2.6.2
-    Added extra KeyboardInterrupt handling
+def check_output(args, flag=True):
+    r"""
+    subprocess.check_output with the flag set to true will spawn a new shell process and execute 'args'
+    if there is an error while executing args, the error message will be displayed on stdout along with the
+    custom error message in the except block. This allows the user to receive an accurate error message if subprocess
+    fails to execute the specified command.
+    If flag is set to true, subprocess.check_output takes 'args' as a string instead of a list. To abstract the user from this,
+    this function will convert the argument list to a string if needed.
+    >>> test_checkoutput(args=['/usr/bin/python', '--version'])
+    Python 2.7.10
+    In case args generates an error
+    >>> test_checkoutput(args=['/usr/bin/pythons', '-ve']) #Incorrect argument
+    /bin/sh: /usr/bin/pythons: command not found
+    subprocess.check_output could not execute command ' /usr/bin/pythons -ve '
     """
+    if flag:
+        args = " ".join(args)
     try:
-        process = subprocess.Popen(stdout=stdout, *popenargs, **kwargs)
-        output, unused_err = process.communicate()
-        retcode = process.poll()
-        if retcode:
-            cmd = kwargs.get("args")
-            if cmd is None:
-                cmd = popenargs[0]
-            error = subprocess.CalledProcessError(retcode, cmd)
-            error.output = output
-            raise error
-        return output
-    except KeyboardInterrupt:
-        process.terminate()
-        raise
+        res = subprocess.check_output(args, shell=flag)
+        return res
+    except subprocess.CalledProcessError:
+        """
+        If this error is raised, it is because check_output could not execute the command.
+        Instead of raising the error, output a more specific error message
+        """
+        error = "subprocess.check_output could not execute command ' " + str(args) + " '"
+        print(error, file=sys.stderr)
+        return error
 
 def sample_dict(d, n=10, use_random=True):
      """

From c22c4185a11b3ecc4f3c12407ebaf440f11de650 Mon Sep 17 00:00:00 2001
From: kirit93 <kirit.thadaka@gmail.com>
Date: Fri, 24 Mar 2017 00:33:22 +0530
Subject: [PATCH 02/14] Imported __future__ print_function for python2.7
 compatibility

---
 gensim/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gensim/utils.py b/gensim/utils.py
index 271f7b1547..e4dd560e51 100644
--- a/gensim/utils.py
+++ b/gensim/utils.py
@@ -8,7 +8,7 @@
 This module contains various general utility functions.
 """
 
-from __future__ import with_statement
+from __future__ import with_statement, print_function
 
 import logging, warnings
 

From b379418277269ee8d4d7b865cb98b152c8ac4fe0 Mon Sep 17 00:00:00 2001
From: kirit93 <kirit.thadaka@gmail.com>
Date: Fri, 24 Mar 2017 09:33:29 +0530
Subject: [PATCH 03/14] Logging error instead of printing

---
 gensim/utils.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/gensim/utils.py b/gensim/utils.py
index e4dd560e51..b815b7aead 100644
--- a/gensim/utils.py
+++ b/gensim/utils.py
@@ -8,7 +8,7 @@
 This module contains various general utility functions.
 """
 
-from __future__ import with_statement, print_function
+from __future__ import with_statement
 
 import logging, warnings
 
@@ -1171,8 +1171,7 @@ def check_output(args, flag=True):
         If this error is raised, it is because check_output could not execute the command.
         Instead of raising the error, output a more specific error message
         """
-        error = "subprocess.check_output could not execute command ' " + str(args) + " '"
-        print(error, file=sys.stderr)
+        logger.error("subprocess.check_output could not execute command ' %s ' " % (str(args)))
         return error
 
 def sample_dict(d, n=10, use_random=True):

From b73e0f955e721498194f6c81453560ab4494559b Mon Sep 17 00:00:00 2001
From: kirit93 <kirit.thadaka@gmail.com>
Date: Fri, 24 Mar 2017 10:25:40 +0530
Subject: [PATCH 04/14] Corrected small error in check_output

---
 gensim/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gensim/utils.py b/gensim/utils.py
index b815b7aead..85f4b718cd 100644
--- a/gensim/utils.py
+++ b/gensim/utils.py
@@ -1171,7 +1171,8 @@ def check_output(args, flag=True):
         If this error is raised, it is because check_output could not execute the command.
         Instead of raising the error, output a more specific error message
         """
-        logger.error("subprocess.check_output could not execute command ' %s ' " % (str(args)))
+        error = "subprocess.check_output could not execute command ' " + str(args) + " '"
+        logger.error(error)
         return error
 
 def sample_dict(d, n=10, use_random=True):

From dd5abb1718f89dbc5bd2f03e9474acd779feb82e Mon Sep 17 00:00:00 2001
From: Kirit Thadaka <kirit.thadaka@gmail.com>
Date: Wed, 19 Apr 2017 12:00:18 +0530
Subject: [PATCH 05/14] Reverting to previous error handling logic

---
 gensim/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gensim/utils.py b/gensim/utils.py
index 85f4b718cd..d93d88ed41 100644
--- a/gensim/utils.py
+++ b/gensim/utils.py
@@ -1173,7 +1173,7 @@ def check_output(args, flag=True):
         """
         error = "subprocess.check_output could not execute command ' " + str(args) + " '"
         logger.error(error)
-        return error
+        raise
 
 def sample_dict(d, n=10, use_random=True):
      """

From f2d3b91f65649f92bf92d5e5c1cbefdfb58d5c93 Mon Sep 17 00:00:00 2001
From: Kirit Thadaka <kirit.thadaka@gmail.com>
Date: Wed, 10 May 2017 21:58:58 +0530
Subject: [PATCH 06/14] Logging error message and added assertion for exception

---
 gensim/test/test_utils.py | 7 ++-----
 gensim/utils.py           | 5 ++---
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/gensim/test/test_utils.py b/gensim/test/test_utils.py
index eb58cfbaa7..b7c7d985a9 100644
--- a/gensim/test/test_utils.py
+++ b/gensim/test/test_utils.py
@@ -10,6 +10,7 @@
 
 import logging
 import unittest
+import subprocess
 
 from gensim import utils
 from six import iteritems
@@ -98,11 +99,7 @@ def test_check_output(self):
         self.assertEqual(res, b'hello\n')
 
     def test_check_output_exception(self):
-        error = utils.check_output(args=['ldfs'])
-        self.assertEqual(error, "subprocess.check_output could not execute command ' ldfs '")
-
-    def test_exception(self):
-        self.assertRaises(Exception, utils.check_output(args=['pythons']))
+        self.assertRaises(subprocess.CalledProcessError, lambda : utils.check_output(args=["ldfs"]))
 
 
 
diff --git a/gensim/utils.py b/gensim/utils.py
index d93d88ed41..a0c75fc66c 100644
--- a/gensim/utils.py
+++ b/gensim/utils.py
@@ -1166,13 +1166,12 @@ def check_output(args, flag=True):
     try:
         res = subprocess.check_output(args, shell=flag)
         return res
-    except subprocess.CalledProcessError:
+    except subprocess.CalledProcessError as e:
         """
         If this error is raised, it is because check_output could not execute the command.
         Instead of raising the error, output a more specific error message
         """
-        error = "subprocess.check_output could not execute command ' " + str(args) + " '"
-        logger.error(error)
+        logger.error(e)
         raise
 
 def sample_dict(d, n=10, use_random=True):

From 2c01e0fb07edd40ab9680bb87878b85e345fb39c Mon Sep 17 00:00:00 2001
From: Kirit Thadaka <kirit.thadaka@gmail.com>
Date: Wed, 10 May 2017 22:03:10 +0530
Subject: [PATCH 07/14] Minor documentation change

---
 gensim/utils.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/gensim/utils.py b/gensim/utils.py
index a0c75fc66c..f8c5dd8147 100644
--- a/gensim/utils.py
+++ b/gensim/utils.py
@@ -1149,18 +1149,12 @@ def keep_vocab_item(word, count, min_count, trim_rule=None):
 def check_output(args, flag=True):
     r"""
     subprocess.check_output with the flag set to true will spawn a new shell process and execute 'args'
-    if there is an error while executing args, the error message will be displayed on stdout along with the
-    custom error message in the except block. This allows the user to receive an accurate error message if subprocess
-    fails to execute the specified command.
+    if there is an error while executing args, the error message will be logged. 
+    This allows the user to receive an accurate error message if subprocess fails to execute the specified command.
     If flag is set to true, subprocess.check_output takes 'args' as a string instead of a list. To abstract the user from this,
     this function will convert the argument list to a string if needed.
-    >>> test_checkoutput(args=['/usr/bin/python', '--version'])
-    Python 2.7.10
-    In case args generates an error
-    >>> test_checkoutput(args=['/usr/bin/pythons', '-ve']) #Incorrect argument
-    /bin/sh: /usr/bin/pythons: command not found
-    subprocess.check_output could not execute command ' /usr/bin/pythons -ve '
     """
+
     if flag:
         args = " ".join(args)
     try:

From 040576d1d70ca6af84c5a9d3cdfb21d7c4e69ea2 Mon Sep 17 00:00:00 2001
From: Kirit Thadaka <kirit.thadaka@gmail.com>
Date: Wed, 10 May 2017 22:21:01 +0530
Subject: [PATCH 08/14] Removed trailing whitespace

---
 gensim/utils.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gensim/utils.py b/gensim/utils.py
index f8c5dd8147..083d71eaa6 100644
--- a/gensim/utils.py
+++ b/gensim/utils.py
@@ -1149,12 +1149,11 @@ def keep_vocab_item(word, count, min_count, trim_rule=None):
 def check_output(args, flag=True):
     r"""
     subprocess.check_output with the flag set to true will spawn a new shell process and execute 'args'
-    if there is an error while executing args, the error message will be logged. 
+    if there is an error while executing args, the error message will be logged.
     This allows the user to receive an accurate error message if subprocess fails to execute the specified command.
     If flag is set to true, subprocess.check_output takes 'args' as a string instead of a list. To abstract the user from this,
     this function will convert the argument list to a string if needed.
     """
-
     if flag:
         args = " ".join(args)
     try:

From 874ca71c516def30c3953854cb1cc37a1339bc17 Mon Sep 17 00:00:00 2001
From: Kirit Thadaka <kirit.thadaka@gmail.com>
Date: Wed, 10 May 2017 22:41:50 +0530
Subject: [PATCH 09/14] Resolved more whitespace issues

---
 gensim/test/test_utils.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/gensim/test/test_utils.py b/gensim/test/test_utils.py
index b7c7d985a9..bef7a6abce 100644
--- a/gensim/test/test_utils.py
+++ b/gensim/test/test_utils.py
@@ -92,15 +92,14 @@ def test_sample_dict(self):
         sampled_dict_random = utils.sample_dict(d,2)
         if sampled_dict_random in expected_dict_random:
             self.assertTrue(True)
-
+            
 class TestCheckOutput(unittest.TestCase):
     def test_check_output(self):
         res = utils.check_output(args=["echo", "hello"])
         self.assertEqual(res, b'hello\n')
-
+    
     def test_check_output_exception(self):
-        self.assertRaises(subprocess.CalledProcessError, lambda : utils.check_output(args=["ldfs"]))
-
+        self.assertRaises(subprocess.CalledProcessError, lambda:utils.check_output(args=["ldfs"]))
 
 
 if __name__ == '__main__':

From b9f1bd1303275c4967ea9d4a59f0fb393c579012 Mon Sep 17 00:00:00 2001
From: Kirit Thadaka <kirit.thadaka@gmail.com>
Date: Wed, 10 May 2017 23:02:30 +0530
Subject: [PATCH 10/14] More whitespace errors resolved

---
 gensim/test/test_utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gensim/test/test_utils.py b/gensim/test/test_utils.py
index bef7a6abce..1628ebeaf6 100644
--- a/gensim/test/test_utils.py
+++ b/gensim/test/test_utils.py
@@ -92,14 +92,14 @@ def test_sample_dict(self):
         sampled_dict_random = utils.sample_dict(d,2)
         if sampled_dict_random in expected_dict_random:
             self.assertTrue(True)
-            
+
 class TestCheckOutput(unittest.TestCase):
     def test_check_output(self):
         res = utils.check_output(args=["echo", "hello"])
         self.assertEqual(res, b'hello\n')
-    
+
     def test_check_output_exception(self):
-        self.assertRaises(subprocess.CalledProcessError, lambda:utils.check_output(args=["ldfs"]))
+        self.assertRaises(subprocess.CalledProcessError, lambda: utils.check_output(args=["ldfs"]))
 
 
 if __name__ == '__main__':

From 8255994580ef1422d0362b04db27f02638106af2 Mon Sep 17 00:00:00 2001
From: Kirit Thadaka <kirit.thadaka@gmail.com>
Date: Wed, 10 May 2017 23:25:53 +0530
Subject: [PATCH 11/14] Conforms to pep8

---
 gensim/test/test_utils.py |  4 +++-
 gensim/utils.py           | 18 ++++++++++++------
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/gensim/test/test_utils.py b/gensim/test/test_utils.py
index 1628ebeaf6..23578fa744 100644
--- a/gensim/test/test_utils.py
+++ b/gensim/test/test_utils.py
@@ -93,13 +93,15 @@ def test_sample_dict(self):
         if sampled_dict_random in expected_dict_random:
             self.assertTrue(True)
 
+
 class TestCheckOutput(unittest.TestCase):
     def test_check_output(self):
         res = utils.check_output(args=["echo", "hello"])
         self.assertEqual(res, b'hello\n')
 
     def test_check_output_exception(self):
-        self.assertRaises(subprocess.CalledProcessError, lambda: utils.check_output(args=["ldfs"]))
+        self.assertRaises(subprocess.CalledProcessError,
+                          lambda: utils.check_output(args=["ldfs"]))
 
 
 if __name__ == '__main__':
diff --git a/gensim/utils.py b/gensim/utils.py
index 083d71eaa6..e65f7cf71a 100644
--- a/gensim/utils.py
+++ b/gensim/utils.py
@@ -1146,12 +1146,16 @@ def keep_vocab_item(word, count, min_count, trim_rule=None):
         else:
             return default_res
 
+
 def check_output(args, flag=True):
     r"""
-    subprocess.check_output with the flag set to true will spawn a new shell process and execute 'args'
-    if there is an error while executing args, the error message will be logged.
-    This allows the user to receive an accurate error message if subprocess fails to execute the specified command.
-    If flag is set to true, subprocess.check_output takes 'args' as a string instead of a list. To abstract the user from this,
+    subprocess.check_output with the flag set to true will spawn a new
+    shell process and execute 'args' if there is an error while executing
+    args, the error message will be logged. This allows the user to
+    receive an accurate error message if subprocess fails to execute the
+    specified command.
+    If flag is set to true, subprocess.check_output takes 'args' as a string
+    instead of a list. To abstract the user from this,
     this function will convert the argument list to a string if needed.
     """
     if flag:
@@ -1161,12 +1165,14 @@ def check_output(args, flag=True):
         return res
     except subprocess.CalledProcessError as e:
         """
-        If this error is raised, it is because check_output could not execute the command.
-        Instead of raising the error, output a more specific error message
+        If this error is raised, it is because check_output could not execute
+        the command. Instead of raising the error, output a more specific error
+        message
         """
         logger.error(e)
         raise
 
+
 def sample_dict(d, n=10, use_random=True):
      """
      Pick `n` items from dictionary `d` and return them as a list.

From 85762b4ef9b28c1c81842f540e6162187757f703 Mon Sep 17 00:00:00 2001
From: Kirit Thadaka <kirit.thadaka@gmail.com>
Date: Tue, 30 May 2017 11:48:12 +0530
Subject: [PATCH 12/14] Removed r from docstring

---
 gensim/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gensim/utils.py b/gensim/utils.py
index e65f7cf71a..2bd396f1d0 100644
--- a/gensim/utils.py
+++ b/gensim/utils.py
@@ -1148,7 +1148,7 @@ def keep_vocab_item(word, count, min_count, trim_rule=None):
 
 
 def check_output(args, flag=True):
-    r"""
+    """
     subprocess.check_output with the flag set to true will spawn a new
     shell process and execute 'args' if there is an error while executing
     args, the error message will be logged. This allows the user to

From 720dd27eb175fd43a41154952aded713fca3e198 Mon Sep 17 00:00:00 2001
From: Kirit Thadaka <kirit.thadaka@gmail.com>
Date: Fri, 9 Jun 2017 22:01:24 +0530
Subject: [PATCH 13/14] Changing check output interface to work with calls to
 old implementation

---
 gensim/utils.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/gensim/utils.py b/gensim/utils.py
index 8ef90867c4..6afd4a4468 100644
--- a/gensim/utils.py
+++ b/gensim/utils.py
@@ -1154,7 +1154,8 @@ def keep_vocab_item(word, count, min_count, trim_rule=None):
             return default_res
 
 
-def check_output(args, flag=True):
+
+def check_output(**kwargs):
     """
     subprocess.check_output with the flag set to true will spawn a new
     shell process and execute 'args' if there is an error while executing
@@ -1165,10 +1166,10 @@ def check_output(args, flag=True):
     instead of a list. To abstract the user from this,
     this function will convert the argument list to a string if needed.
     """
-    if flag:
-        args = " ".join(args)
+    args = kwargs.get("args")
+    args = " ".join(args)
     try:
-        res = subprocess.check_output(args, shell=flag)
+        res = subprocess.check_output(args, shell=True)
         return res
     except subprocess.CalledProcessError as e:
         """
@@ -1176,8 +1177,12 @@ def check_output(args, flag=True):
         the command. Instead of raising the error, output a more specific error
         message
         """
+        logger.debug("Error cause due to argument passed to check_output - %s", args)
         logger.error(e)
         raise
+    except KeyboardInterrupt:
+        raise
+
 
 
 def sample_dict(d, n=10, use_random=True):

From 0d4e8c995f6e4afe15a24d36e6bcbdfe686211ff Mon Sep 17 00:00:00 2001
From: Kirit Thadaka <kirit.thadaka@gmail.com>
Date: Fri, 9 Jun 2017 22:29:34 +0530
Subject: [PATCH 14/14] Added flake8 fixes

---
 gensim/utils.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/gensim/utils.py b/gensim/utils.py
index 6afd4a4468..8120d44620 100644
--- a/gensim/utils.py
+++ b/gensim/utils.py
@@ -1154,7 +1154,6 @@ def keep_vocab_item(word, count, min_count, trim_rule=None):
             return default_res
 
 
-
 def check_output(**kwargs):
     """
     subprocess.check_output with the flag set to true will spawn a new
@@ -1177,14 +1176,14 @@ def check_output(**kwargs):
         the command. Instead of raising the error, output a more specific error
         message
         """
-        logger.debug("Error cause due to argument passed to check_output - %s", args)
+        logger.debug("Error cause due to argument passed to check_output - %s",
+                     args)
         logger.error(e)
         raise
     except KeyboardInterrupt:
         raise
 
 
-
 def sample_dict(d, n=10, use_random=True):
     """
     Pick `n` items from dictionary `d` and return them as a list.