Skip to content

Commit

Permalink
Merge pull request #642 from ExplodingCabbage/specify-data-path
Browse files Browse the repository at this point in the history
Let --data-path be specified when running download.py scripts
  • Loading branch information
ines authored Nov 23, 2016
2 parents ede2bab + fbe1968 commit a7b5fba
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 12 deletions.
16 changes: 16 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,22 @@ and ``--model`` are optional and enable additional tests:
python -m pytest <spacy-directory> --vectors --model --slow
Download model to custom location
=================================

You can specify where ``spacy.en.download`` and ``spacy.de.download`` download the language model
to using the ``--data-path`` or ``-d`` argument:

.. code:: bash
python -m spacy.en.download all --data-path /some/dir
If you choose to download to a custom location, you will need to tell spaCy where to load the model
from in order to use it. You can do this either by calling ``spacy.util.set_data_path()`` before
calling ``spacy.load()``, or by passing a ``path`` argument to the ``spacy.en.English`` or
``spacy.de.German`` constructors.

Changelog
=========

Expand Down
5 changes: 3 additions & 2 deletions spacy/de/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@

@plac.annotations(
force=("Force overwrite", "flag", "f", bool),
data_path=("Path to download model", "option", "d", str)
)
def main(data_size='all', force=False):
download('de', force)
def main(data_size='all', force=False, data_path=None):
download('de', force=force, data_path=data_path)


if __name__ == '__main__':
Expand Down
18 changes: 13 additions & 5 deletions spacy/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,19 @@
from . import util


def download(lang, force=False, fail_on_exist=True):
def download(lang, force=False, fail_on_exist=True, data_path=None):
if not data_path:
data_path = util.get_data_path()

# spaCy uses pathlib, and util.get_data_path returns a pathlib.Path object,
# but sputnik (which we're using below) doesn't use pathlib and requires
# its data_path parameters to be strings, so we coerce the data_path to a
# str here.
data_path = str(data_path)

try:
pkg = sputnik.package(about.__title__, about.__version__,
about.__models__.get(lang, lang))
about.__models__.get(lang, lang), data_path)
if force:
shutil.rmtree(pkg.path)
elif fail_on_exist:
Expand All @@ -24,15 +33,14 @@ def download(lang, force=False, fail_on_exist=True):
pass

package = sputnik.install(about.__title__, about.__version__,
about.__models__.get(lang, lang))
about.__models__.get(lang, lang), data_path)

try:
sputnik.package(about.__title__, about.__version__,
about.__models__.get(lang, lang))
about.__models__.get(lang, lang), data_path)
except (PackageNotFoundException, CompatiblePackageNotFoundException):
print("Model failed to install. Please run 'python -m "
"spacy.%s.download --force'." % lang, file=sys.stderr)
sys.exit(1)

data_path = util.get_data_path()
print("Model successfully installed to %s" % data_path, file=sys.stderr)
7 changes: 4 additions & 3 deletions spacy/en/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,18 @@

@plac.annotations(
force=("Force overwrite", "flag", "f", bool),
data_path=("Path to download model", "option", "d", str)
)
def main(data_size='all', force=False):
def main(data_size='all', force=False, data_path=None):
if force:
sputnik.purge(about.__title__, about.__version__)

if data_size in ('all', 'parser'):
print("Downloading parsing model")
download('en', False)
download('en', force=False, data_path=data_path)
if data_size in ('all', 'glove'):
print("Downloading GloVe vectors")
download('en_glove_cc_300_1m_vectors', False)
download('en_glove_cc_300_1m_vectors', force=False, data_path=data_path)


if __name__ == '__main__':
Expand Down
3 changes: 1 addition & 2 deletions spacy/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,8 @@ def train(cls, path, gold_tuples, *configs):
self.end_training()

def __init__(self, path=True, **overrides):
if 'data_dir' in overrides and 'path' not in overrides:
if 'data_dir' in overrides and path is True:
raise ValueError("The argument 'data_dir' has been renamed to 'path'")
path = overrides.get('path', True)
if isinstance(path, basestring):
path = pathlib.Path(path)
if path is True:
Expand Down
17 changes: 17 additions & 0 deletions website/docs/usage/index.jade
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,20 @@ p
python -m pip install -U pytest

python -m pytest &lt;spacy-directory&gt; --vectors --model --slow

+h(2, "custom-location") Download model to custom location

p
| You can specify where #[code spacy.en.download] and
| #[code spacy.de.download] download the language model to using the
| #[code --data-path] or #[code -d] argument:

+code(false, "bash").
python -m spacy.en.download all --data-path /some/dir

p
| If you choose to download to a custom location, you will need to tell
| spaCy where to load the model from in order to use it. You can do this
| either by calling #[code spacy.util.set_data_path()] before calling
| #[code spacy.load()], or by passing a #[code path] argument to the
| #[code spacy.en.English] or #[code spacy.de.German] constructors.

0 comments on commit a7b5fba

Please sign in to comment.