From 82c48c0aa6b667b2481a5536cf94410d3ee8d67e Mon Sep 17 00:00:00 2001 From: Shubham Agarwal Date: Tue, 4 Feb 2020 15:14:47 +0000 Subject: [PATCH 1/7] 1. Shell and python file to only download image chat data. 2. Updated readme --- parlai/tasks/image_chat/README.md | 1 + parlai/tasks/image_chat/download_data.py | 26 ++++++++++++++++++++++++ parlai/tasks/image_chat/download_data.sh | 19 +++++++++++++++++ 3 files changed, 46 insertions(+) create mode 100644 parlai/tasks/image_chat/download_data.py create mode 100755 parlai/tasks/image_chat/download_data.sh diff --git a/parlai/tasks/image_chat/README.md b/parlai/tasks/image_chat/README.md index 15abebb0eb2..6dec5042348 100644 --- a/parlai/tasks/image_chat/README.md +++ b/parlai/tasks/image_chat/README.md @@ -6,3 +6,4 @@ Tags: #Image_Chat, #All, #Visual, #ChitChat Notes: If you have already downloaded the images, please specify with the `--yfcc-path` flag, as the image download script takes a very long time to run +If you just want to download data, run as `./parlai/tasks/image_chat/download_data.sh`. Change the required `$DATA_DIR` variable to where you want to save the file. Defaults to `/tmp`. It basically calls the wrapper `parlai/tasks/image_chat/download_data.py` diff --git a/parlai/tasks/image_chat/download_data.py b/parlai/tasks/image_chat/download_data.py new file mode 100644 index 00000000000..386f17e6183 --- /dev/null +++ b/parlai/tasks/image_chat/download_data.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 + +from parlai.tasks.image_chat.build import build +import argparse + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "-dp", "--datapath", default="/tmp", + help="Path where to save data." + ) + + args = parser.parse_args() + # opts is dic in parlai + args = vars(args) + + return args + + +if __name__ == "__main__": + opt = parse_args() + # Only datapath is required by build. + # Using build function to check the version and + # internal hash + build(opt) diff --git a/parlai/tasks/image_chat/download_data.sh b/parlai/tasks/image_chat/download_data.sh new file mode 100755 index 00000000000..0090f3afcd7 --- /dev/null +++ b/parlai/tasks/image_chat/download_data.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +export CURRENT_DIR=${PWD} +# Should ideally give the parlai clone path +echo "Running code from: " $CURRENT_DIR + +#export TASK_DIR="$(dirname "$CURRENT_DIR")" +#export PARLAI_CODE_DIR="$(dirname "$TASK_DIR")" +#export PROJECT_DIR="$(dirname "$PARLAI_CODE_DIR")" +# Going to the project directory +#cd $PROJECT_DIR + +export DATA_DIR=/tmp/ +mkdir -p $DATA_DIR + +echo "Downloading in data root: " $DATA_DIR + +PYTHONPATH=. python parlai/tasks/image_chat/download_data.py \ +-dp $DATA_DIR From c15b618109b872c948b6c1227452a73d3a5edd1b Mon Sep 17 00:00:00 2001 From: Shubham Agarwal Date: Tue, 4 Feb 2020 17:15:39 +0000 Subject: [PATCH 2/7] Adding Copyright --- parlai/tasks/image_chat/download_data.py | 3 +++ parlai/tasks/image_chat/download_data.sh | 9 +-------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/parlai/tasks/image_chat/download_data.py b/parlai/tasks/image_chat/download_data.py index 386f17e6183..cd4e6eca12d 100644 --- a/parlai/tasks/image_chat/download_data.py +++ b/parlai/tasks/image_chat/download_data.py @@ -1,4 +1,7 @@ #!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. from parlai.tasks.image_chat.build import build import argparse diff --git a/parlai/tasks/image_chat/download_data.sh b/parlai/tasks/image_chat/download_data.sh index 0090f3afcd7..248e7e73daa 100755 --- a/parlai/tasks/image_chat/download_data.sh +++ b/parlai/tasks/image_chat/download_data.sh @@ -1,18 +1,11 @@ #!/usr/bin/env bash export CURRENT_DIR=${PWD} -# Should ideally give the parlai clone path +# Should return the ParlAI clone path echo "Running code from: " $CURRENT_DIR -#export TASK_DIR="$(dirname "$CURRENT_DIR")" -#export PARLAI_CODE_DIR="$(dirname "$TASK_DIR")" -#export PROJECT_DIR="$(dirname "$PARLAI_CODE_DIR")" -# Going to the project directory -#cd $PROJECT_DIR - export DATA_DIR=/tmp/ mkdir -p $DATA_DIR - echo "Downloading in data root: " $DATA_DIR PYTHONPATH=. python parlai/tasks/image_chat/download_data.py \ From bba43ab3f3a7e9519a15f5bdbf7375cbf6b24b24 Mon Sep 17 00:00:00 2001 From: Shubham Agarwal Date: Tue, 4 Feb 2020 17:53:04 +0000 Subject: [PATCH 3/7] incorporating pylint suggestions --- parlai/tasks/image_chat/download_data.py | 6 +++++- parlai/tasks/image_chat/download_data.sh | 3 +++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/parlai/tasks/image_chat/download_data.py b/parlai/tasks/image_chat/download_data.py index cd4e6eca12d..7efa02a8b01 100644 --- a/parlai/tasks/image_chat/download_data.py +++ b/parlai/tasks/image_chat/download_data.py @@ -3,11 +3,15 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -from parlai.tasks.image_chat.build import build import argparse +from parlai.tasks.image_chat.build import build def parse_args(): + """ + Wrapper to parse CLI arguments + :return: args + """ parser = argparse.ArgumentParser() parser.add_argument( "-dp", "--datapath", default="/tmp", diff --git a/parlai/tasks/image_chat/download_data.sh b/parlai/tasks/image_chat/download_data.sh index 248e7e73daa..edb1ae1779a 100755 --- a/parlai/tasks/image_chat/download_data.sh +++ b/parlai/tasks/image_chat/download_data.sh @@ -1,4 +1,7 @@ #!/usr/bin/env bash +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. export CURRENT_DIR=${PWD} # Should return the ParlAI clone path From cd8a0e41c2ece71968c4cb8ea52bc95aee96f741 Mon Sep 17 00:00:00 2001 From: Shubham Agarwal Date: Tue, 4 Feb 2020 19:13:19 +0000 Subject: [PATCH 4/7] running autoformat.sh --- .circleci/config.yml | 4 ++-- .github/pull_request_template.md | 2 +- .github/workflows/lint.yml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 1fa46d08bb5..83145aa3bc1 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -65,7 +65,7 @@ installdeps: &installdeps pip3 install --progress-bar off codecov mkdir -p ~/ParlAI/test-results pip install -q -r requirements.txt - python setup.py develop + python download_data.py develop python -c "import nltk; nltk.download('punkt')" installtorchgpu14: &installtorchgpu14 @@ -429,7 +429,7 @@ jobs: working_directory: ~/ParlAI/ command: | pip install gitpython - python setup.py develop --no-deps + python download_data.py develop --no-deps python .circleci/triggers.py | while read job; do curl -s \ --data "build_parameters[CIRCLE_JOB]=${job}" \ diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 34025a4f470..4d4877788fe 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -16,4 +16,4 @@ Any other information or context you would like to provide. **Data tests (if applicable)** If you added a new teacher, you will be asked to run -`python setup.py test -s tests.suites.datatests`. Please paste this log here. +`python download_data.py test -s tests.suites.datatests`. Please paste this log here. diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 5b51270ca6a..814438f0b16 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -31,7 +31,7 @@ jobs: run: | set -eux pip install -q flake8 flake8-bugbear flake8-black docformatter==1.3.0 - python setup.py develop --no-deps # get our custom flake8 errors + python download_data.py develop --no-deps # get our custom flake8 errors python -c 'import parlai' flake8 --version bash autoformat.sh -c -f | tee ${GITHUB_WORKSPACE}/output-annotations.txt From d5df49de1d829a5c40acb7c3f6c48b5eeeb180f3 Mon Sep 17 00:00:00 2001 From: Shubham Agarwal Date: Wed, 5 Feb 2020 18:00:16 +0000 Subject: [PATCH 5/7] related to #2381. rolling back after autoformat --- .circleci/config.yml | 4 ++-- .github/pull_request_template.md | 2 +- .github/workflows/lint.yml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 83145aa3bc1..1fa46d08bb5 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -65,7 +65,7 @@ installdeps: &installdeps pip3 install --progress-bar off codecov mkdir -p ~/ParlAI/test-results pip install -q -r requirements.txt - python download_data.py develop + python setup.py develop python -c "import nltk; nltk.download('punkt')" installtorchgpu14: &installtorchgpu14 @@ -429,7 +429,7 @@ jobs: working_directory: ~/ParlAI/ command: | pip install gitpython - python download_data.py develop --no-deps + python setup.py develop --no-deps python .circleci/triggers.py | while read job; do curl -s \ --data "build_parameters[CIRCLE_JOB]=${job}" \ diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 4d4877788fe..34025a4f470 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -16,4 +16,4 @@ Any other information or context you would like to provide. **Data tests (if applicable)** If you added a new teacher, you will be asked to run -`python download_data.py test -s tests.suites.datatests`. Please paste this log here. +`python setup.py test -s tests.suites.datatests`. Please paste this log here. diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 814438f0b16..5b51270ca6a 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -31,7 +31,7 @@ jobs: run: | set -eux pip install -q flake8 flake8-bugbear flake8-black docformatter==1.3.0 - python download_data.py develop --no-deps # get our custom flake8 errors + python setup.py develop --no-deps # get our custom flake8 errors python -c 'import parlai' flake8 --version bash autoformat.sh -c -f | tee ${GITHUB_WORKSPACE}/output-annotations.txt From b098e070010b0c8afe40bd06dc31107a1b82ea77 Mon Sep 17 00:00:00 2001 From: Shubham Agarwal Date: Thu, 6 Feb 2020 23:38:21 +0000 Subject: [PATCH 6/7] lint suggestions --- parlai/tasks/image_chat/download_data.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/parlai/tasks/image_chat/download_data.py b/parlai/tasks/image_chat/download_data.py index 7efa02a8b01..80b3d0ba938 100644 --- a/parlai/tasks/image_chat/download_data.py +++ b/parlai/tasks/image_chat/download_data.py @@ -9,13 +9,12 @@ def parse_args(): """ - Wrapper to parse CLI arguments + Wrapper to parse CLI arguments. :return: args """ parser = argparse.ArgumentParser() parser.add_argument( - "-dp", "--datapath", default="/tmp", - help="Path where to save data." + "-dp", "--datapath", default="/tmp", help="Path where to save data." ) args = parser.parse_args() From ea3490ef68a08e210c0fb3dd69ff1831cd8de8c6 Mon Sep 17 00:00:00 2001 From: Shubham Agarwal Date: Thu, 6 Feb 2020 23:39:25 +0000 Subject: [PATCH 7/7] lint suggestions --- parlai/tasks/image_chat/download_data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/parlai/tasks/image_chat/download_data.py b/parlai/tasks/image_chat/download_data.py index 80b3d0ba938..c4c2e38ae7a 100644 --- a/parlai/tasks/image_chat/download_data.py +++ b/parlai/tasks/image_chat/download_data.py @@ -10,6 +10,7 @@ def parse_args(): """ Wrapper to parse CLI arguments. + :return: args """ parser = argparse.ArgumentParser()