From e14871949f9b5c3bd065edbb88aab87799219967 Mon Sep 17 00:00:00 2001 From: parulsethi Date: Sat, 27 May 2017 16:12:20 +0530 Subject: [PATCH 01/19] added dockerfile --- docker/Dockerfile | 131 +++++++++++++++++++++++++++++++ docker/check_fast_version.py | 12 +++ docker/docker-compose.yml | 7 ++ docker/start_jupyter_notebook.sh | 7 ++ docker/wordrank_install.sh | 19 +++++ 5 files changed, 176 insertions(+) create mode 100644 docker/Dockerfile create mode 100644 docker/check_fast_version.py create mode 100644 docker/docker-compose.yml create mode 100644 docker/start_jupyter_notebook.sh create mode 100644 docker/wordrank_install.sh diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000000..92804c2757 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,131 @@ +FROM ubuntu:16.04 + +MAINTAINER Daniel Baptista Dias + +ENV GENSIM_REPOSITORY https://github.com/RaRe-Technologies/gensim/archive +ENV GENSIM_VERSION master + +# Installs python, pip and setup tools (with fixed versions) +RUN apt-get update \ + && apt-get install -y \ + ant=1.9.6-1ubuntu1 \ + cmake=3.5.1-1ubuntu3 \ + default-jdk=2:1.8-56ubuntu2 \ + g++=4:5.3.1-1ubuntu1 \ + git=1:2.7.4-0ubuntu1 \ + libboost-all-dev=1.58.0.1ubuntu1 \ + libgsl-dev=2.1+dfsg-2 \ + mercurial=3.7.3-1ubuntu1 \ + python3=3.5.1-3 \ + python3-pip=8.1.1-2ubuntu0.4 \ + python3-setuptools=20.7.0-1 \ + unzip=6.0-20ubuntu1 \ + wget=1.17.1-1ubuntu1.1 + libboost-program-options-dev \ + libboost-python-dev \ + zlib1g-dev + +# Setup python language +RUN locale-gen en_US.UTF-8 +ENV LANG en_US.UTF-8 +ENV LC_CTYPE en_US.UTF-8 +ENV LC_ALL en_US.UTF-8 + +# Make some symlinks +RUN cd /usr/bin \ + && ln -s easy_install3 easy_install \ + && ln -s pip3 pip + +# Upgrade pip +RUN pip install --upgrade pip + +# Install dependencies +RUN pip install \ + annoy \ + cython \ + jupyter \ + matplotlib \ + nltk \ + pandas \ + pyemd \ + sklearn \ + fasttext \ + testfixtures \ + unittest2 \ + git+git://github.com/mila-udem/fuel.git@stable \ + git+git://github.com/mila-udem/blocks.git@stable \ + -r https://raw.githubusercontent.com/mila-udem/blocks/stable/requirements.txt + +# Create gensim directory and dependencies directory +RUN mkdir /gensim \ + && mkdir /gensim_dependencies + +# Download gensim from Github +RUN mkdir /gensim/download \ + && cd /gensim/download \ + && wget --quiet $GENSIM_REPOSITORY/$GENSIM_VERSION.zip \ + && unzip $GENSIM_VERSION.zip \ + && mv ./gensim-$GENSIM_VERSION/* /gensim \ + && rm -rf /gensim/download \ + && cd /gensim \ + && python setup.py install + +# Set ENV variables for wrappers +ENV FT_HOME gensim_dependencies/fastText +ENV WR_HOME gensim_dependencies/wordrank +ENV MALLET_HOME gensim_dependencies/mallet +ENV DTM_PATH gensim_dependencies/dtm/bin/dtm-linux64 +ENV VOWPAL_WABBIT_PATH gensim_dependencies/vowpal_wabbit/vowpalwabbit/vw + +# Install custom dependencies + +# Install WordRank +RUN cd /gensim_dependencies \ + && git clone https://bitbucket.org/shihaoji/wordrank \ + && cp /gensim/docker/wordrank_install.sh /gensim_dependencies/wordrank/install.sh \ + && cd /gensim_dependencies/wordrank \ + && sh ./install.sh + +# Install fastText +RUN cd /gensim_dependencies \ + && git clone https://github.com/facebookresearch/fastText.git \ + && cd /gensim_dependencies/fastText \ + && make + +# Install MorphologicalPriorsForWordEmbeddings +RUN cd /gensim_dependencies \ + && git clone https://github.com/rguthrie3/MorphologicalPriorsForWordEmbeddings.git + +# Install DTM +RUN cd /gensim_dependencies \ + && git clone https://github.com/magsilva/dtm.git + +# Install Mallet +RUN mkdir /gensim_dependencies/mallet \ + && mkdir /gensim_dependencies/download \ + && cd /gensim_dependencies/download \ + && wget --quiet http://mallet.cs.umass.edu/dist/mallet-2.0.8.zip \ + && unzip mallet-2.0.8.zip \ + && mv ./mallet-2.0.8/* /gensim_dependencies/mallet \ + && rm -rf /gensim_dependencies/download \ + && cd /gensim_dependencies/mallet \ + && ant + +# Install Vowpal wabbit +RUN cd /gensim_dependencies \ + && git clone https://github.com/JohnLangford/vowpal_wabbit.git + && cd /gensim_dependencies/vowpal_wabbit \ + && make + && make install + +# Start gensim + +# Run check script +RUN python /gensim/docker/check_fast_version.py + +# Add running permission to startup script +RUN chmod +x /gensim/docker/start_jupyter_notebook.sh + +# Define the starting command for this container and expose its running port +CMD sh -c '/gensim/docker/start_jupyter_notebook.sh 9000' +EXPOSE 9000 \ No newline at end of file diff --git a/docker/check_fast_version.py b/docker/check_fast_version.py new file mode 100644 index 0000000000..b389eba8ae --- /dev/null +++ b/docker/check_fast_version.py @@ -0,0 +1,12 @@ +import sys + +try: + from gensim.models.word2vec_inner import train_batch_sg, train_batch_cbow + from gensim.models.word2vec_inner import score_sentence_sg, score_sentence_cbow + from gensim.models.word2vec_inner import FAST_VERSION, MAX_WORDS_IN_BATCH + + print('FAST_VERSION ok ! Retrieved with value ', FAST_VERSION) + sys.exit() +except ImportError: + print('Failed... fall back to plain numpy (20-80x slower training than the above)') + sys.exit(-1) diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml new file mode 100644 index 0000000000..84f5f2b712 --- /dev/null +++ b/docker/docker-compose.yml @@ -0,0 +1,7 @@ +version: '2' + +services: + gensim: + build: . + ports: + - 9000:9000 \ No newline at end of file diff --git a/docker/start_jupyter_notebook.sh b/docker/start_jupyter_notebook.sh new file mode 100644 index 0000000000..4c5946d056 --- /dev/null +++ b/docker/start_jupyter_notebook.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +PORT=$1 +NOTEBOOK_DIR=/gensim/docs/notebooks +DEFAULT_URL=/notebooks/gensim%20Quick%20Start.ipynb + +jupyter notebook --no-browser --ip=* --port=$PORT --allow-root --notebook-dir=$NOTEBOOK_DIR --NotebookApp.token=\"\" --NotebookApp.default_url=$DEFAULT_URL \ No newline at end of file diff --git a/docker/wordrank_install.sh b/docker/wordrank_install.sh new file mode 100644 index 0000000000..36a13e3007 --- /dev/null +++ b/docker/wordrank_install.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +printf "1. clean up workspace\n" +./clean.sh + +printf "\n2. install glove to construct cooccurrence matrix\n" +wget http://nlp.stanford.edu/software/GloVe-1.0.tar.gz # if failed, check http://nlp.stanford.edu/projects/glove/ for the original version +tar -xvzf GloVe-1.0.tar.gz; rm GloVe-1.0.tar.gz +patch -p0 -i glove.patch +cd glove; make clean all; cd .. + +printf "\n3. install hyperwords for evaluation\n" +hg clone -r 56 https://bitbucket.org/omerlevy/hyperwords +patch -p0 -i hyperwords.patch + +printf "\n4. build wordrank\n" +export CC=gcc CXX=g++ # uncomment this line if you don't have an Intel compiler, but with gcc all #pragma simd are ignored as of now +cmake . +make clean all \ No newline at end of file From b5e27b5f2e693525d0754d27bfb4372c075fa0fa Mon Sep 17 00:00:00 2001 From: parulsethi Date: Sat, 27 May 2017 16:56:35 +0530 Subject: [PATCH 02/19] remove fasttext from pip installs --- docker/Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 92804c2757..b5c90c70c5 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -49,7 +49,6 @@ RUN pip install \ pandas \ pyemd \ sklearn \ - fasttext \ testfixtures \ unittest2 \ git+git://github.com/mila-udem/fuel.git@stable \ From e653d3f88a8de31245c94afb64ab209c2a9230b9 Mon Sep 17 00:00:00 2001 From: parulsethi Date: Sat, 27 May 2017 17:08:10 +0530 Subject: [PATCH 03/19] remove syntax errors --- docker/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index b5c90c70c5..fe7f64b3d3 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -20,7 +20,7 @@ RUN apt-get update \ python3-pip=8.1.1-2ubuntu0.4 \ python3-setuptools=20.7.0-1 \ unzip=6.0-20ubuntu1 \ - wget=1.17.1-1ubuntu1.1 + wget=1.17.1-1ubuntu1.1 \ libboost-program-options-dev \ libboost-python-dev \ zlib1g-dev @@ -112,9 +112,9 @@ RUN mkdir /gensim_dependencies/mallet \ # Install Vowpal wabbit RUN cd /gensim_dependencies \ - && git clone https://github.com/JohnLangford/vowpal_wabbit.git + && git clone https://github.com/JohnLangford/vowpal_wabbit.git \ && cd /gensim_dependencies/vowpal_wabbit \ - && make + && make \ && make install # Start gensim From a40b1dfbf2e930f0f580f678b238baae55ed712b Mon Sep 17 00:00:00 2001 From: parulsethi Date: Sat, 27 May 2017 23:07:16 +0530 Subject: [PATCH 04/19] remove unused imports --- docker/check_fast_version.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/docker/check_fast_version.py b/docker/check_fast_version.py index b389eba8ae..f988f49433 100644 --- a/docker/check_fast_version.py +++ b/docker/check_fast_version.py @@ -1,8 +1,6 @@ import sys try: - from gensim.models.word2vec_inner import train_batch_sg, train_batch_cbow - from gensim.models.word2vec_inner import score_sentence_sg, score_sentence_cbow from gensim.models.word2vec_inner import FAST_VERSION, MAX_WORDS_IN_BATCH print('FAST_VERSION ok ! Retrieved with value ', FAST_VERSION) From 330b9ad214a21c7d5c021faf4e985b852f7fd8a4 Mon Sep 17 00:00:00 2001 From: parulsethi Date: Mon, 12 Jun 2017 17:35:25 +0530 Subject: [PATCH 05/19] modified dockerfile --- docker/Dockerfile | 45 +++++++++++++++++++----------------- docker/check_fast_version.py | 2 +- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index fe7f64b3d3..0c9cd60ef7 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -2,8 +2,9 @@ FROM ubuntu:16.04 MAINTAINER Daniel Baptista Dias -ENV GENSIM_REPOSITORY https://github.com/RaRe-Technologies/gensim/archive -ENV GENSIM_VERSION master +ENV GENSIM_REPOSITORY https://github.com/parulsethi/gensim/archive +ENV GENSIM_BRANCH gensim_docker +ENV PYTHONPATH "/usr/local/lib/python3/dist-packages:/usr/local/lib/python3.5/site-packages:/usr/lib/python3/dist-packages" # Installs python, pip and setup tools (with fixed versions) RUN apt-get update \ @@ -21,6 +22,7 @@ RUN apt-get update \ python3-setuptools=20.7.0-1 \ unzip=6.0-20ubuntu1 \ wget=1.17.1-1ubuntu1.1 \ + libopenblas-dev \ libboost-program-options-dev \ libboost-python-dev \ zlib1g-dev @@ -41,18 +43,19 @@ RUN pip install --upgrade pip # Install dependencies RUN pip install \ - annoy \ - cython \ - jupyter \ - matplotlib \ - nltk \ - pandas \ - pyemd \ - sklearn \ + annoy==1.8.3 \ + cython==0.25.2 \ + jupyter==1.0.0 \ + matplotlib==2.0.0 \ + morfessor==2.0.2a4 \ + nltk==3.2.2 \ + pandas==0.19.2 \ + pyemd==0.4.3 \ + scikit-learn==0.18.1 \ testfixtures \ unittest2 \ - git+git://github.com/mila-udem/fuel.git@stable \ - git+git://github.com/mila-udem/blocks.git@stable \ + git+https://github.com/mila-udem/fuel.git@stable \ + git+https://github.com/mila-udem/blocks.git@stable \ -r https://raw.githubusercontent.com/mila-udem/blocks/stable/requirements.txt # Create gensim directory and dependencies directory @@ -62,9 +65,9 @@ RUN mkdir /gensim \ # Download gensim from Github RUN mkdir /gensim/download \ && cd /gensim/download \ - && wget --quiet $GENSIM_REPOSITORY/$GENSIM_VERSION.zip \ - && unzip $GENSIM_VERSION.zip \ - && mv ./gensim-$GENSIM_VERSION/* /gensim \ + && wget --quiet $GENSIM_REPOSITORY/$GENSIM_BRANCH.zip \ + && unzip $GENSIM_BRANCH.zip \ + && mv ./gensim-$GENSIM_BRANCH/* /gensim \ && rm -rf /gensim/download \ && cd /gensim \ && python setup.py install @@ -78,6 +81,12 @@ ENV VOWPAL_WABBIT_PATH gensim_dependencies/vowpal_wabbit/vowpalwabbit/vw # Install custom dependencies +# Install fastText +RUN cd /gensim_dependencies \ + && git clone https://github.com/facebookresearch/fastText.git \ + && cd /gensim_dependencies/fastText \ + && make + # Install WordRank RUN cd /gensim_dependencies \ && git clone https://bitbucket.org/shihaoji/wordrank \ @@ -85,12 +94,6 @@ RUN cd /gensim_dependencies \ && cd /gensim_dependencies/wordrank \ && sh ./install.sh -# Install fastText -RUN cd /gensim_dependencies \ - && git clone https://github.com/facebookresearch/fastText.git \ - && cd /gensim_dependencies/fastText \ - && make - # Install MorphologicalPriorsForWordEmbeddings RUN cd /gensim_dependencies \ && git clone https://github.com/rguthrie3/MorphologicalPriorsForWordEmbeddings.git diff --git a/docker/check_fast_version.py b/docker/check_fast_version.py index f988f49433..958c59852d 100644 --- a/docker/check_fast_version.py +++ b/docker/check_fast_version.py @@ -1,7 +1,7 @@ import sys try: - from gensim.models.word2vec_inner import FAST_VERSION, MAX_WORDS_IN_BATCH + from gensim.models.word2vec_inner import FAST_VERSION print('FAST_VERSION ok ! Retrieved with value ', FAST_VERSION) sys.exit() From 800aae69cc8754d9f1f91abaef89c3825e1cde52 Mon Sep 17 00:00:00 2001 From: parulsethi Date: Tue, 13 Jun 2017 22:05:30 +0530 Subject: [PATCH 06/19] add subversion, locales --- docker/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 0c9cd60ef7..ffffcd430f 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -4,7 +4,6 @@ MAINTAINER Daniel Baptista Dias ENV GENSIM_REPOSITORY https://github.com/parulsethi/gensim/archive ENV GENSIM_BRANCH gensim_docker -ENV PYTHONPATH "/usr/local/lib/python3/dist-packages:/usr/local/lib/python3.5/site-packages:/usr/lib/python3/dist-packages" # Installs python, pip and setup tools (with fixed versions) RUN apt-get update \ @@ -22,6 +21,8 @@ RUN apt-get update \ python3-setuptools=20.7.0-1 \ unzip=6.0-20ubuntu1 \ wget=1.17.1-1ubuntu1.1 \ + subversion \ + locales \ libopenblas-dev \ libboost-program-options-dev \ libboost-python-dev \ From 498ae7900610e68d742a843b865255b0fa7879fd Mon Sep 17 00:00:00 2001 From: parulsethi Date: Mon, 19 Jun 2017 16:16:29 +0530 Subject: [PATCH 07/19] use both python2 and python3 --- docker/Dockerfile | 76 ++++++++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 34 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index ffffcd430f..9ea7f23b94 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -19,13 +19,15 @@ RUN apt-get update \ python3=3.5.1-3 \ python3-pip=8.1.1-2ubuntu0.4 \ python3-setuptools=20.7.0-1 \ + python \ + python-pip \ + python-setuptools \ unzip=6.0-20ubuntu1 \ wget=1.17.1-1ubuntu1.1 \ subversion \ locales \ libopenblas-dev \ libboost-program-options-dev \ - libboost-python-dev \ zlib1g-dev # Setup python language @@ -34,34 +36,34 @@ ENV LANG en_US.UTF-8 ENV LC_CTYPE en_US.UTF-8 ENV LC_ALL en_US.UTF-8 -# Make some symlinks -RUN cd /usr/bin \ - && ln -s easy_install3 easy_install \ - && ln -s pip3 pip - # Upgrade pip -RUN pip install --upgrade pip +RUN pip2 install --upgrade pip +RUN pip3 install --upgrade pip # Install dependencies -RUN pip install \ - annoy==1.8.3 \ +RUN pip2 install \ + cython==0.25.2 \ + jupyter==1.0.0 \ + matplotlib==2.0.0 \ + nltk==3.2.2 \ + pandas==0.19.2 \ + git+https://github.com/mila-udem/fuel.git@stable \ + git+https://github.com/mila-udem/blocks.git@stable \ + -r https://raw.githubusercontent.com/mila-udem/blocks/stable/requirements.txt + +RUN pip3 install \ cython==0.25.2 \ jupyter==1.0.0 \ matplotlib==2.0.0 \ - morfessor==2.0.2a4 \ nltk==3.2.2 \ pandas==0.19.2 \ - pyemd==0.4.3 \ - scikit-learn==0.18.1 \ - testfixtures \ - unittest2 \ git+https://github.com/mila-udem/fuel.git@stable \ git+https://github.com/mila-udem/blocks.git@stable \ -r https://raw.githubusercontent.com/mila-udem/blocks/stable/requirements.txt # Create gensim directory and dependencies directory RUN mkdir /gensim \ - && mkdir /gensim_dependencies + && mkdir /gensim/gensim_dependencies # Download gensim from Github RUN mkdir /gensim/download \ @@ -71,60 +73,66 @@ RUN mkdir /gensim/download \ && mv ./gensim-$GENSIM_BRANCH/* /gensim \ && rm -rf /gensim/download \ && cd /gensim \ - && python setup.py install + && pip2 install .[test] \ + && pip3 install .[test] \ + && python2 setup.py install \ + && python3 setup.py install # Set ENV variables for wrappers ENV FT_HOME gensim_dependencies/fastText ENV WR_HOME gensim_dependencies/wordrank ENV MALLET_HOME gensim_dependencies/mallet -ENV DTM_PATH gensim_dependencies/dtm/bin/dtm-linux64 +ENV DTM_PATH gensim_dependencies/dtm/dtm/main ENV VOWPAL_WABBIT_PATH gensim_dependencies/vowpal_wabbit/vowpalwabbit/vw # Install custom dependencies # Install fastText -RUN cd /gensim_dependencies \ +RUN cd /gensim/gensim_dependencies \ && git clone https://github.com/facebookresearch/fastText.git \ - && cd /gensim_dependencies/fastText \ + && cd /gensim/gensim_dependencies/fastText \ && make # Install WordRank -RUN cd /gensim_dependencies \ +RUN cd /gensim/gensim_dependencies \ && git clone https://bitbucket.org/shihaoji/wordrank \ - && cp /gensim/docker/wordrank_install.sh /gensim_dependencies/wordrank/install.sh \ - && cd /gensim_dependencies/wordrank \ + && cp /gensim/docker/wordrank_install.sh /gensim/gensim_dependencies/wordrank/install.sh \ + && cd /gensim/gensim_dependencies/wordrank \ && sh ./install.sh # Install MorphologicalPriorsForWordEmbeddings -RUN cd /gensim_dependencies \ +RUN cd /gensim/gensim_dependencies \ && git clone https://github.com/rguthrie3/MorphologicalPriorsForWordEmbeddings.git # Install DTM -RUN cd /gensim_dependencies \ - && git clone https://github.com/magsilva/dtm.git +RUN cd /gensim/gensim_dependencies \ + && git clone https://github.com/blei-lab/dtm.git \ + && cd /gensim/gensim_dependencies/dtm/dtm \ + && make # Install Mallet -RUN mkdir /gensim_dependencies/mallet \ - && mkdir /gensim_dependencies/download \ - && cd /gensim_dependencies/download \ +RUN mkdir /gensim/gensim_dependencies/mallet \ + && mkdir /gensim/gensim_dependencies/download \ + && cd /gensim/gensim_dependencies/download \ && wget --quiet http://mallet.cs.umass.edu/dist/mallet-2.0.8.zip \ && unzip mallet-2.0.8.zip \ - && mv ./mallet-2.0.8/* /gensim_dependencies/mallet \ - && rm -rf /gensim_dependencies/download \ - && cd /gensim_dependencies/mallet \ + && mv ./mallet-2.0.8/* /gensim/gensim_dependencies/mallet \ + && rm -rf /gensim/gensim_dependencies/download \ + && cd /gensim/gensim_dependencies/mallet \ && ant # Install Vowpal wabbit -RUN cd /gensim_dependencies \ +RUN cd /gensim/gensim_dependencies \ && git clone https://github.com/JohnLangford/vowpal_wabbit.git \ - && cd /gensim_dependencies/vowpal_wabbit \ + && cd /gensim/gensim_dependencies/vowpal_wabbit \ && make \ && make install # Start gensim # Run check script -RUN python /gensim/docker/check_fast_version.py +RUN python2 /gensim/docker/check_fast_version.py +RUN python3 /gensim/docker/check_fast_version.py # Add running permission to startup script RUN chmod +x /gensim/docker/start_jupyter_notebook.sh From 9afaca74eb73f001fe1b7ec21537850ce7bc247c Mon Sep 17 00:00:00 2001 From: parulsethi Date: Wed, 21 Jun 2017 14:20:54 +0530 Subject: [PATCH 08/19] upgrade numpy version --- docker/Dockerfile | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 9ea7f23b94..0b1f1d6139 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -47,7 +47,6 @@ RUN pip2 install \ matplotlib==2.0.0 \ nltk==3.2.2 \ pandas==0.19.2 \ - git+https://github.com/mila-udem/fuel.git@stable \ git+https://github.com/mila-udem/blocks.git@stable \ -r https://raw.githubusercontent.com/mila-udem/blocks/stable/requirements.txt @@ -57,10 +56,13 @@ RUN pip3 install \ matplotlib==2.0.0 \ nltk==3.2.2 \ pandas==0.19.2 \ - git+https://github.com/mila-udem/fuel.git@stable \ git+https://github.com/mila-udem/blocks.git@stable \ -r https://raw.githubusercontent.com/mila-udem/blocks/stable/requirements.txt +# avoid using old numpy version installed by blocks requirements +RUN pip2 install -U numpy +RUN pip3 install -U numpy + # Create gensim directory and dependencies directory RUN mkdir /gensim \ && mkdir /gensim/gensim_dependencies @@ -74,8 +76,8 @@ RUN mkdir /gensim/download \ && rm -rf /gensim/download \ && cd /gensim \ && pip2 install .[test] \ - && pip3 install .[test] \ && python2 setup.py install \ + && pip3 install .[test] \ && python3 setup.py install # Set ENV variables for wrappers @@ -139,4 +141,4 @@ RUN chmod +x /gensim/docker/start_jupyter_notebook.sh # Define the starting command for this container and expose its running port CMD sh -c '/gensim/docker/start_jupyter_notebook.sh 9000' -EXPOSE 9000 \ No newline at end of file +EXPOSE 9000 From 19ce3ca20d77dfe375b0d23353971fb6637e3319 Mon Sep 17 00:00:00 2001 From: parulsethi Date: Thu, 22 Jun 2017 21:15:08 +0530 Subject: [PATCH 09/19] add readme with relevant commands --- docker/README.md | 49 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 docker/README.md diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 0000000000..e024e1ee52 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,49 @@ +# Build gensim image + +In docker directory run the following command to build the image locally: + +``` +docker build -t gensim . +```` + +After that, you will need to search the gensim_image_id and tag the image using: + +``` +# check the image +docker image ls + +# generage the tag +docker tag [gensim_image_id] [my_user]/gensim:latest +``` + +Run the interactive bash mode: + +``` +docker run -it [my_user]/gensim /bin/bash +``` + +Run gensim tests in the interactive bash mode: + +``` +# run tests with desired python version +cd gensim +python2 setup.py test +``` + +To push the image to docker hub: + +``` +# login to docker hub +docker login + +# push image to docker hub +docker push [my_user]/gensim +``` + +# Run gensim image from anywhere + +Just execute: + +``` +docker run -p 9000:9000 [my_user]/gensim +``` \ No newline at end of file From ee3c4cfb9a5fa0f35a64ed6b2342face14b8faf3 Mon Sep 17 00:00:00 2001 From: parulsethi Date: Sun, 25 Jun 2017 23:03:38 +0530 Subject: [PATCH 10/19] add fixed versions for wrapper dependencies --- docker/Dockerfile | 46 ++++++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 0b1f1d6139..05c9778c0d 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -2,8 +2,8 @@ FROM ubuntu:16.04 MAINTAINER Daniel Baptista Dias -ENV GENSIM_REPOSITORY https://github.com/parulsethi/gensim/archive -ENV GENSIM_BRANCH gensim_docker +ENV GENSIM_REPOSITORY https://codeload.github.com/RaRe-Technologies/gensim/zip +ENV GENSIM_VERSION 2.2.0 # Installs python, pip and setup tools (with fixed versions) RUN apt-get update \ @@ -47,7 +47,7 @@ RUN pip2 install \ matplotlib==2.0.0 \ nltk==3.2.2 \ pandas==0.19.2 \ - git+https://github.com/mila-udem/blocks.git@stable \ + git+https://github.com/mila-udem/blocks.git@7beb788f1fcfc78d56c59a5edf9b4e8d98f8d7d9 \ -r https://raw.githubusercontent.com/mila-udem/blocks/stable/requirements.txt RUN pip3 install \ @@ -56,7 +56,7 @@ RUN pip3 install \ matplotlib==2.0.0 \ nltk==3.2.2 \ pandas==0.19.2 \ - git+https://github.com/mila-udem/blocks.git@stable \ + git+https://github.com/mila-udem/blocks.git@7beb788f1fcfc78d56c59a5edf9b4e8d98f8d7d9 \ -r https://raw.githubusercontent.com/mila-udem/blocks/stable/requirements.txt # avoid using old numpy version installed by blocks requirements @@ -70,9 +70,9 @@ RUN mkdir /gensim \ # Download gensim from Github RUN mkdir /gensim/download \ && cd /gensim/download \ - && wget --quiet $GENSIM_REPOSITORY/$GENSIM_BRANCH.zip \ - && unzip $GENSIM_BRANCH.zip \ - && mv ./gensim-$GENSIM_BRANCH/* /gensim \ + && wget --quiet $GENSIM_REPOSITORY/$GENSIM_VERSION \ + && unzip $GENSIM_VERSION \ + && mv ./gensim-$GENSIM_VERSION/* /gensim \ && rm -rf /gensim/download \ && cd /gensim \ && pip2 install .[test] \ @@ -81,11 +81,19 @@ RUN mkdir /gensim/download \ && python3 setup.py install # Set ENV variables for wrappers -ENV FT_HOME gensim_dependencies/fastText -ENV WR_HOME gensim_dependencies/wordrank -ENV MALLET_HOME gensim_dependencies/mallet -ENV DTM_PATH gensim_dependencies/dtm/dtm/main -ENV VOWPAL_WABBIT_PATH gensim_dependencies/vowpal_wabbit/vowpalwabbit/vw +ENV FT_HOME ~/gensim/gensim_dependencies/fastText +ENV WR_HOME ~/gensim/gensim_dependencies/wordrank +ENV MALLET_HOME ~/gensim/gensim_dependencies/mallet +ENV DTM_PATH ~/gensim/gensim_dependencies/dtm/dtm/main +ENV VOWPAL_WABBIT_PATH ~/gensim/gensim_dependencies/vowpal_wabbit/vowpalwabbit/vw + +# For fixed version downloads of gensim wrappers dependencies +ENV FASTTEXT_VERSION f24a781021862f0e475a5fb9c55b7c1cec3b6e2e +ENV WORDRANK_VERSION 44f3f7786f76c79c083dfad9d64e20bacfb4a0b0 +ENV MORPHOLOGICALPRIORSFORWORDEMBEDDINGS_VERSION ec2e37a3bcb8bd7b56b75b043c47076bc5decf22 +ENV DTM_VERSION 67139e6f526b2bc33aef56dc36176a1b8b210056 +ENV MALLET_VERSION 2.0.8 +ENV VOWPAL_WABBIT_VERSION 9f61108b7ed53311024f884f6b55887c938391f9 # Install custom dependencies @@ -93,6 +101,7 @@ ENV VOWPAL_WABBIT_PATH gensim_dependencies/vowpal_wabbit/vowpalwabbit/vw RUN cd /gensim/gensim_dependencies \ && git clone https://github.com/facebookresearch/fastText.git \ && cd /gensim/gensim_dependencies/fastText \ + && git checkout $FASTTEXT_VERSION \ && make # Install WordRank @@ -100,25 +109,29 @@ RUN cd /gensim/gensim_dependencies \ && git clone https://bitbucket.org/shihaoji/wordrank \ && cp /gensim/docker/wordrank_install.sh /gensim/gensim_dependencies/wordrank/install.sh \ && cd /gensim/gensim_dependencies/wordrank \ + && git checkout $WORDRANK_VERSION \ && sh ./install.sh # Install MorphologicalPriorsForWordEmbeddings RUN cd /gensim/gensim_dependencies \ - && git clone https://github.com/rguthrie3/MorphologicalPriorsForWordEmbeddings.git + && git clone https://github.com/rguthrie3/MorphologicalPriorsForWordEmbeddings.git \ + && cd /gensim_dependencies/MorphologicalPriorsForWordEmbeddings \ + && git checkout $MORPHOLOGICALPRIORSFORWORDEMBEDDINGS_VERSION # Install DTM RUN cd /gensim/gensim_dependencies \ && git clone https://github.com/blei-lab/dtm.git \ && cd /gensim/gensim_dependencies/dtm/dtm \ + && git checkout $DTM_VERSION \ && make # Install Mallet RUN mkdir /gensim/gensim_dependencies/mallet \ && mkdir /gensim/gensim_dependencies/download \ && cd /gensim/gensim_dependencies/download \ - && wget --quiet http://mallet.cs.umass.edu/dist/mallet-2.0.8.zip \ - && unzip mallet-2.0.8.zip \ - && mv ./mallet-2.0.8/* /gensim/gensim_dependencies/mallet \ + && wget --quiet http://mallet.cs.umass.edu/dist/mallet-$MALLET_VERSION.zip \ + && unzip mallet-$MALLET_VERSION.zip \ + && mv ./mallet-$MALLET_VERSION/* /gensim/gensim_dependencies/mallet \ && rm -rf /gensim/gensim_dependencies/download \ && cd /gensim/gensim_dependencies/mallet \ && ant @@ -127,6 +140,7 @@ RUN mkdir /gensim/gensim_dependencies/mallet \ RUN cd /gensim/gensim_dependencies \ && git clone https://github.com/JohnLangford/vowpal_wabbit.git \ && cd /gensim/gensim_dependencies/vowpal_wabbit \ + && git checkout VOWPAL_WABBIT_VERSION \ && make \ && make install From 9fd041e36b996c7f74dff5f6bdfee20d9c4560cb Mon Sep 17 00:00:00 2001 From: parulsethi Date: Thu, 29 Jun 2017 17:18:58 +0530 Subject: [PATCH 11/19] made requested changes --- docker/Dockerfile | 18 +++++------------- docker/README.md | 12 +----------- docker/wordrank_install.sh | 19 ------------------- 3 files changed, 6 insertions(+), 43 deletions(-) delete mode 100644 docker/wordrank_install.sh diff --git a/docker/Dockerfile b/docker/Dockerfile index 05c9778c0d..fee0ba5285 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -82,21 +82,21 @@ RUN mkdir /gensim/download \ # Set ENV variables for wrappers ENV FT_HOME ~/gensim/gensim_dependencies/fastText -ENV WR_HOME ~/gensim/gensim_dependencies/wordrank ENV MALLET_HOME ~/gensim/gensim_dependencies/mallet ENV DTM_PATH ~/gensim/gensim_dependencies/dtm/dtm/main ENV VOWPAL_WABBIT_PATH ~/gensim/gensim_dependencies/vowpal_wabbit/vowpalwabbit/vw # For fixed version downloads of gensim wrappers dependencies ENV FASTTEXT_VERSION f24a781021862f0e475a5fb9c55b7c1cec3b6e2e -ENV WORDRANK_VERSION 44f3f7786f76c79c083dfad9d64e20bacfb4a0b0 ENV MORPHOLOGICALPRIORSFORWORDEMBEDDINGS_VERSION ec2e37a3bcb8bd7b56b75b043c47076bc5decf22 ENV DTM_VERSION 67139e6f526b2bc33aef56dc36176a1b8b210056 ENV MALLET_VERSION 2.0.8 -ENV VOWPAL_WABBIT_VERSION 9f61108b7ed53311024f884f6b55887c938391f9 +ENV VOWPAL_WABBIT_VERSION ba51ab08fa0c3b8d29cbb8cb87472b3ebd24cd55 # Install custom dependencies +# TODO: Install wordrank (need to install mpich/openmpi with multithreading enabled) + # Install fastText RUN cd /gensim/gensim_dependencies \ && git clone https://github.com/facebookresearch/fastText.git \ @@ -104,18 +104,10 @@ RUN cd /gensim/gensim_dependencies \ && git checkout $FASTTEXT_VERSION \ && make -# Install WordRank -RUN cd /gensim/gensim_dependencies \ - && git clone https://bitbucket.org/shihaoji/wordrank \ - && cp /gensim/docker/wordrank_install.sh /gensim/gensim_dependencies/wordrank/install.sh \ - && cd /gensim/gensim_dependencies/wordrank \ - && git checkout $WORDRANK_VERSION \ - && sh ./install.sh - # Install MorphologicalPriorsForWordEmbeddings RUN cd /gensim/gensim_dependencies \ && git clone https://github.com/rguthrie3/MorphologicalPriorsForWordEmbeddings.git \ - && cd /gensim_dependencies/MorphologicalPriorsForWordEmbeddings \ + && cd /gensim/gensim_dependencies/MorphologicalPriorsForWordEmbeddings \ && git checkout $MORPHOLOGICALPRIORSFORWORDEMBEDDINGS_VERSION # Install DTM @@ -140,7 +132,7 @@ RUN mkdir /gensim/gensim_dependencies/mallet \ RUN cd /gensim/gensim_dependencies \ && git clone https://github.com/JohnLangford/vowpal_wabbit.git \ && cd /gensim/gensim_dependencies/vowpal_wabbit \ - && git checkout VOWPAL_WABBIT_VERSION \ + && git checkout $VOWPAL_WABBIT_VERSION \ && make \ && make install diff --git a/docker/README.md b/docker/README.md index e024e1ee52..40cf8cd6ba 100644 --- a/docker/README.md +++ b/docker/README.md @@ -30,17 +30,7 @@ cd gensim python2 setup.py test ``` -To push the image to docker hub: - -``` -# login to docker hub -docker login - -# push image to docker hub -docker push [my_user]/gensim -``` - -# Run gensim image from anywhere +# Run ipython notebook with installed gensim Just execute: diff --git a/docker/wordrank_install.sh b/docker/wordrank_install.sh deleted file mode 100644 index 36a13e3007..0000000000 --- a/docker/wordrank_install.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - -printf "1. clean up workspace\n" -./clean.sh - -printf "\n2. install glove to construct cooccurrence matrix\n" -wget http://nlp.stanford.edu/software/GloVe-1.0.tar.gz # if failed, check http://nlp.stanford.edu/projects/glove/ for the original version -tar -xvzf GloVe-1.0.tar.gz; rm GloVe-1.0.tar.gz -patch -p0 -i glove.patch -cd glove; make clean all; cd .. - -printf "\n3. install hyperwords for evaluation\n" -hg clone -r 56 https://bitbucket.org/omerlevy/hyperwords -patch -p0 -i hyperwords.patch - -printf "\n4. build wordrank\n" -export CC=gcc CXX=g++ # uncomment this line if you don't have an Intel compiler, but with gcc all #pragma simd are ignored as of now -cmake . -make clean all \ No newline at end of file From 1cc3398e6cb3fd3ed5ecf9937a01fb4195286cce Mon Sep 17 00:00:00 2001 From: Menshikh Ivan Date: Thu, 29 Jun 2017 20:44:15 +0500 Subject: [PATCH 12/19] update readme --- docker/README.md | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/docker/README.md b/docker/README.md index 40cf8cd6ba..c577413a87 100644 --- a/docker/README.md +++ b/docker/README.md @@ -4,36 +4,18 @@ In docker directory run the following command to build the image locally: ``` docker build -t gensim . -```` - -After that, you will need to search the gensim_image_id and tag the image using: - ``` -# check the image -docker image ls -# generage the tag -docker tag [gensim_image_id] [my_user]/gensim:latest -``` +# Run ipython notebook with installed gensim -Run the interactive bash mode: +Just execute: ``` -docker run -it [my_user]/gensim /bin/bash +docker run -p 9000:9000 gensim ``` -Run gensim tests in the interactive bash mode: +# Run the interactive bash mode ``` -# run tests with desired python version -cd gensim -python2 setup.py test -``` - -# Run ipython notebook with installed gensim - -Just execute: - +docker run -it gensim /bin/bash ``` -docker run -p 9000:9000 [my_user]/gensim -``` \ No newline at end of file From 4825a47395c10b62e6cad804314159c70f9b26f2 Mon Sep 17 00:00:00 2001 From: parulsethi Date: Thu, 29 Jun 2017 21:34:41 +0530 Subject: [PATCH 13/19] change vw pin and remove docker-yml --- docker/Dockerfile | 24 ++++++++++++------------ docker/docker-compose.yml | 7 ------- 2 files changed, 12 insertions(+), 19 deletions(-) delete mode 100644 docker/docker-compose.yml diff --git a/docker/Dockerfile b/docker/Dockerfile index fee0ba5285..2e196e6396 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -2,8 +2,8 @@ FROM ubuntu:16.04 MAINTAINER Daniel Baptista Dias -ENV GENSIM_REPOSITORY https://codeload.github.com/RaRe-Technologies/gensim/zip -ENV GENSIM_VERSION 2.2.0 +ENV GENSIM_REPOSITORY https://github.com/parulsethi/gensim/archive +ENV GENSIM_VERSION gensim_docker # Installs python, pip and setup tools (with fixed versions) RUN apt-get update \ @@ -19,16 +19,16 @@ RUN apt-get update \ python3=3.5.1-3 \ python3-pip=8.1.1-2ubuntu0.4 \ python3-setuptools=20.7.0-1 \ - python \ - python-pip \ - python-setuptools \ + python=2.7.11-1 \ + python-pip=8.1.1-2ubuntu0.4 \ + python-setuptools=20.7.0-1 \ unzip=6.0-20ubuntu1 \ wget=1.17.1-1ubuntu1.1 \ - subversion \ - locales \ - libopenblas-dev \ - libboost-program-options-dev \ - zlib1g-dev + subversion=1.9.3-2ubuntu1 \ + locales=2.23-0ubuntu9 \ + libopenblas-dev=0.2.18-1ubuntu1 \ + libboost-program-options-dev=1.58.0.1ubuntu1 \ + zlib1g-dev=1:1.2.8.dfsg-2ubuntu4.1 # Setup python language RUN locale-gen en_US.UTF-8 @@ -70,8 +70,8 @@ RUN mkdir /gensim \ # Download gensim from Github RUN mkdir /gensim/download \ && cd /gensim/download \ - && wget --quiet $GENSIM_REPOSITORY/$GENSIM_VERSION \ - && unzip $GENSIM_VERSION \ + && wget --quiet $GENSIM_REPOSITORY/$GENSIM_VERSION.zip \ + && unzip $GENSIM_VERSION.zip \ && mv ./gensim-$GENSIM_VERSION/* /gensim \ && rm -rf /gensim/download \ && cd /gensim \ diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml deleted file mode 100644 index 84f5f2b712..0000000000 --- a/docker/docker-compose.yml +++ /dev/null @@ -1,7 +0,0 @@ -version: '2' - -services: - gensim: - build: . - ports: - - 9000:9000 \ No newline at end of file From d0f09bb43616aca8be8e75c7495a9902d115ee5c Mon Sep 17 00:00:00 2001 From: parulsethi Date: Fri, 30 Jun 2017 12:48:31 +0530 Subject: [PATCH 14/19] change vw version and make absolute paths for wrappers --- docker/Dockerfile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 2e196e6396..7b193b94af 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -81,17 +81,17 @@ RUN mkdir /gensim/download \ && python3 setup.py install # Set ENV variables for wrappers -ENV FT_HOME ~/gensim/gensim_dependencies/fastText -ENV MALLET_HOME ~/gensim/gensim_dependencies/mallet -ENV DTM_PATH ~/gensim/gensim_dependencies/dtm/dtm/main -ENV VOWPAL_WABBIT_PATH ~/gensim/gensim_dependencies/vowpal_wabbit/vowpalwabbit/vw +ENV FT_HOME /gensim/gensim_dependencies/fastText +ENV MALLET_HOME /gensim/gensim_dependencies/mallet +ENV DTM_PATH /gensim/gensim_dependencies/dtm/dtm/main +ENV VOWPAL_WABBIT_PATH /gensim/gensim_dependencies/vowpal_wabbit/vowpalwabbit/vw # For fixed version downloads of gensim wrappers dependencies ENV FASTTEXT_VERSION f24a781021862f0e475a5fb9c55b7c1cec3b6e2e ENV MORPHOLOGICALPRIORSFORWORDEMBEDDINGS_VERSION ec2e37a3bcb8bd7b56b75b043c47076bc5decf22 ENV DTM_VERSION 67139e6f526b2bc33aef56dc36176a1b8b210056 ENV MALLET_VERSION 2.0.8 -ENV VOWPAL_WABBIT_VERSION ba51ab08fa0c3b8d29cbb8cb87472b3ebd24cd55 +ENV VOWPAL_WABBIT_VERSION 69ecc2847fa0c876c6e0557af409f386f0ced59a # Install custom dependencies From 36a5025b9862af872e6a673be4c582761f01893c Mon Sep 17 00:00:00 2001 From: parulsethi Date: Fri, 30 Jun 2017 14:45:57 +0530 Subject: [PATCH 15/19] specify original gensim repo for download --- docker/Dockerfile | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 7b193b94af..b82dd40990 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -2,8 +2,9 @@ FROM ubuntu:16.04 MAINTAINER Daniel Baptista Dias -ENV GENSIM_REPOSITORY https://github.com/parulsethi/gensim/archive -ENV GENSIM_VERSION gensim_docker +ENV GENSIM_REPOSITORY https://github.com/RaRe-Technologies/gensim/archive +ENV GENSIM_VERSION 2.2.0 +ENV GENSIM_BRANCH master # Installs python, pip and setup tools (with fixed versions) RUN apt-get update \ @@ -70,11 +71,12 @@ RUN mkdir /gensim \ # Download gensim from Github RUN mkdir /gensim/download \ && cd /gensim/download \ - && wget --quiet $GENSIM_REPOSITORY/$GENSIM_VERSION.zip \ - && unzip $GENSIM_VERSION.zip \ - && mv ./gensim-$GENSIM_VERSION/* /gensim \ + && wget --quiet $GENSIM_REPOSITORY/$GENSIM_BRANCH.zip \ + && unzip $GENSIM_BRANCH.zip \ + && mv ./gensim-$GENSIM_BRANCH/* /gensim \ && rm -rf /gensim/download \ && cd /gensim \ + && git checkout tags/GENSIM_VERSION && pip2 install .[test] \ && python2 setup.py install \ && pip3 install .[test] \ From 2db28f4e309b37bee13a7a7edf12114bfcf959f9 Mon Sep 17 00:00:00 2001 From: parulsethi Date: Fri, 30 Jun 2017 14:52:50 +0530 Subject: [PATCH 16/19] change maintainer --- docker/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index b82dd40990..be0631599f 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,6 +1,6 @@ FROM ubuntu:16.04 -MAINTAINER Daniel Baptista Dias +MAINTAINER Parul Sethi ENV GENSIM_REPOSITORY https://github.com/RaRe-Technologies/gensim/archive ENV GENSIM_VERSION 2.2.0 @@ -76,7 +76,7 @@ RUN mkdir /gensim/download \ && mv ./gensim-$GENSIM_BRANCH/* /gensim \ && rm -rf /gensim/download \ && cd /gensim \ - && git checkout tags/GENSIM_VERSION + && git checkout tags/$GENSIM_VERSION && pip2 install .[test] \ && python2 setup.py install \ && pip3 install .[test] \ From 25865a9df6147a6215c19abfd8df138e6961c3ce Mon Sep 17 00:00:00 2001 From: parulsethi Date: Fri, 30 Jun 2017 14:56:12 +0530 Subject: [PATCH 17/19] correct missing slash --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index be0631599f..1543630b5b 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -76,7 +76,7 @@ RUN mkdir /gensim/download \ && mv ./gensim-$GENSIM_BRANCH/* /gensim \ && rm -rf /gensim/download \ && cd /gensim \ - && git checkout tags/$GENSIM_VERSION + && git checkout tags/$GENSIM_VERSION \ && pip2 install .[test] \ && python2 setup.py install \ && pip3 install .[test] \ From 1add68de716280150e00747ce37ed44806d633de Mon Sep 17 00:00:00 2001 From: parulsethi Date: Fri, 30 Jun 2017 15:32:16 +0530 Subject: [PATCH 18/19] use git clone for gensim --- docker/Dockerfile | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 1543630b5b..41e347146a 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -2,9 +2,8 @@ FROM ubuntu:16.04 MAINTAINER Parul Sethi -ENV GENSIM_REPOSITORY https://github.com/RaRe-Technologies/gensim/archive +ENV GENSIM_REPOSITORY https://github.com/RaRe-Technologies/gensim.git ENV GENSIM_VERSION 2.2.0 -ENV GENSIM_BRANCH master # Installs python, pip and setup tools (with fixed versions) RUN apt-get update \ @@ -69,12 +68,7 @@ RUN mkdir /gensim \ && mkdir /gensim/gensim_dependencies # Download gensim from Github -RUN mkdir /gensim/download \ - && cd /gensim/download \ - && wget --quiet $GENSIM_REPOSITORY/$GENSIM_BRANCH.zip \ - && unzip $GENSIM_BRANCH.zip \ - && mv ./gensim-$GENSIM_BRANCH/* /gensim \ - && rm -rf /gensim/download \ +RUN git clone $GENSIM_REPOSITORY \ && cd /gensim \ && git checkout tags/$GENSIM_VERSION \ && pip2 install .[test] \ From ce4656d0be7d37d573ca4f54b722fbddffff2d3f Mon Sep 17 00:00:00 2001 From: parulsethi Date: Fri, 30 Jun 2017 16:07:43 +0530 Subject: [PATCH 19/19] correct gensim folder sequences --- docker/Dockerfile | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 41e347146a..46f9944a68 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -63,10 +63,6 @@ RUN pip3 install \ RUN pip2 install -U numpy RUN pip3 install -U numpy -# Create gensim directory and dependencies directory -RUN mkdir /gensim \ - && mkdir /gensim/gensim_dependencies - # Download gensim from Github RUN git clone $GENSIM_REPOSITORY \ && cd /gensim \ @@ -76,6 +72,9 @@ RUN git clone $GENSIM_REPOSITORY \ && pip3 install .[test] \ && python3 setup.py install +# Create gensim dependencies directory +RUN mkdir /gensim/gensim_dependencies + # Set ENV variables for wrappers ENV FT_HOME /gensim/gensim_dependencies/fastText ENV MALLET_HOME /gensim/gensim_dependencies/mallet