From 5b5b54592a6aaf3c0f40f162351d11d3b64c9b98 Mon Sep 17 00:00:00 2001 From: Ivan Menshikh Date: Mon, 20 Apr 2020 09:23:42 +0300 Subject: [PATCH] Add osx+py38 case for avoid multiprocessing issue (#2800) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add osx+py38 case for avoid multiprocessing issue * add comment, fix warning * extend comment Co-Authored-By: Radim Řehůřek * Update gensim/utils.py * Update gensim/utils.py Co-Authored-By: Michael Penkov Co-authored-by: Radim Řehůřek Co-authored-by: Michael Penkov --- gensim/utils.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/gensim/utils.py b/gensim/utils.py index 70b12d8a88..90c9279338 100644 --- a/gensim/utils.py +++ b/gensim/utils.py @@ -1238,7 +1238,11 @@ def run(self): self.q.put(wrapped_chunk.pop(), block=True) -if os.name == 'nt': +# Multiprocessing on Windows (and on OSX with python3.8+) uses "spawn" mode, which +# causes issues with pickling. +# So for these two platforms, use simpler serial processing in `chunkize`. +# See https://github.com/RaRe-Technologies/gensim/pull/2800#discussion_r410890171 +if os.name == 'nt' or (sys.platform == "darwin" and sys.version_info >= (3, 8)): def chunkize(corpus, chunksize, maxsize=0, as_numpy=False): """Split `corpus` into fixed-sized chunks, using :func:`~gensim.utils.chunkize_serial`. @@ -1260,7 +1264,8 @@ def chunkize(corpus, chunksize, maxsize=0, as_numpy=False): """ if maxsize > 0: - warnings.warn("detected Windows; aliasing chunkize to chunkize_serial") + entity = "Windows" if os.name == 'nt' else "OSX with python3.8+" + warnings.warn("detected %s; aliasing chunkize to chunkize_serial" % entity) for chunk in chunkize_serial(corpus, chunksize, as_numpy=as_numpy): yield chunk else: