From c61700f39bdf7d461789f0b1a1f05da8d64586f4 Mon Sep 17 00:00:00 2001 From: Yang yaming Date: Fri, 3 Apr 2020 19:57:59 +0800 Subject: [PATCH 1/2] Add doc of TextNAS (#2260) --- docs/en_US/NAS/TextNAS.md | 80 +++++++++ docs/en_US/nas.rst | 1 + examples/nas/textnas/README.md | 6 +- examples/nas/textnas/arc/final_arc.json | 212 ++++++++++++++++++++++++ examples/nas/textnas/run_retrain.sh | 4 +- 5 files changed, 300 insertions(+), 3 deletions(-) create mode 100644 docs/en_US/NAS/TextNAS.md create mode 100644 examples/nas/textnas/arc/final_arc.json diff --git a/docs/en_US/NAS/TextNAS.md b/docs/en_US/NAS/TextNAS.md new file mode 100644 index 0000000000..7c455534ec --- /dev/null +++ b/docs/en_US/NAS/TextNAS.md @@ -0,0 +1,80 @@ +# TextNAS + +## Introduction + +This is the implementation of the TextNAS algorithm proposed in the paper [TextNAS: A Neural Architecture Search Space tailored for Text Representation](https://arxiv.org/pdf/1912.10729.pdf). TextNAS is a neural architecture search algorithm tailored for text representation, more specifically, TextNAS is based on a novel search space consists of operators widely adopted to solve various NLP tasks, and TextNAS also supports multi-path ensemble within a single network to balance the width and depth of the architecture. + +The search space of TextNAS contains: + + * 1-D convolutional operator with filter size 1, 3, 5, 7 + * recurrent operator (bi-directional GRU) + * self-attention operator + * pooling operator (max/average) + +Following the ENAS algorithm, TextNAS also utilizes parameter sharing to accelerate the search speed and adopts a reinforcement-learning controller for the architecture sampling and generation. Please refer to the paper for more details of TextNAS. + +## Preparation + +Prepare the word vectors and SST dataset, and organize them in data directory as shown below: + +``` +textnas +├── data +│ ├── sst +│ │ └── trees +│ │ ├── dev.txt +│ │ ├── test.txt +│ │ └── train.txt +│ └── glove.840B.300d.txt +├── dataloader.py +├── model.py +├── ops.py +├── README.md +├── search.py +└── utils.py +``` + +The following link might be helpful for finding and downloading the corresponding dataset: + +* [GloVe: Global Vectors for Word Representation](https://nlp.stanford.edu/projects/glove/) + * [glove.840B.300d.txt](http://nlp.stanford.edu/data/glove.840B.300d.zip) +* [Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank](https://nlp.stanford.edu/sentiment/) + * [trainDevTestTrees_PTB.zip](https://nlp.stanford.edu/sentiment/trainDevTestTrees_PTB.zip) + +## Examples + +### Search Space + +[Example code](https://github.com/microsoft/nni/tree/master/examples/nas/textnas) + +```bash +# In case NNI code is not cloned. If the code is cloned already, ignore this line and enter code folder. +git clone https://github.com/Microsoft/nni.git + +# search the best architecture +cd examples/nas/textnas + +# view more options for search +python3 search.py -h +``` + +After each search epoch, 10 sampled architectures will be tested directly. Their performances are expected to be 40% - 42% after 10 epochs. + +By default, 20 sampled architectures will be exported into `checkpoints` directory for next step. + +### retrain + +```bash +# In case NNI code is not cloned. If the code is cloned already, ignore this line and enter code folder. +git clone https://github.com/Microsoft/nni.git + +# search the best architecture +cd examples/nas/textnas + +# default to retrain on sst-2 +sh run_retrain.sh +``` + +## Reference + +TextNAS directly uses EnasTrainer, please refer to [ENAS](./ENAS.md) for the trainer APIs. diff --git a/docs/en_US/nas.rst b/docs/en_US/nas.rst index 0a56caa742..f5a06c5c9a 100644 --- a/docs/en_US/nas.rst +++ b/docs/en_US/nas.rst @@ -26,5 +26,6 @@ For details, please refer to the following tutorials: SPOS CDARTS ProxylessNAS + TextNAS Customize a NAS Algorithm API Reference diff --git a/examples/nas/textnas/README.md b/examples/nas/textnas/README.md index fb261ad04d..f8ebe24afd 100644 --- a/examples/nas/textnas/README.md +++ b/examples/nas/textnas/README.md @@ -42,4 +42,8 @@ By default, 20 sampled architectures will be exported into `checkpoints` directo ## Retrain -Not ready. +``` +sh run_retrain.sh +``` + +By default, the script will retrain the architecture provided by the author on the SST-2 dataset. diff --git a/examples/nas/textnas/arc/final_arc.json b/examples/nas/textnas/arc/final_arc.json new file mode 100644 index 0000000000..c1e12c2d4b --- /dev/null +++ b/examples/nas/textnas/arc/final_arc.json @@ -0,0 +1,212 @@ +{ + "LayerChoice1": [ + false, false, false, false, false, true, false, false + ], + "InputChoice2": [ + true + ], + "LayerChoice3": [ + false, false, false, false, false, false, false, true + ], + "InputChoice4": [ + false + ], + "InputChoice5": [ + true, false + ], + "LayerChoice6": [ + false, false, false, true, false, false, false, false + ], + "InputChoice7": [ + false, false + ], + "InputChoice8": [ + false, false, true + ], + "LayerChoice9": [ + false, false, false, false, false, false, true, false + ], + "InputChoice10": [ + false, true, true + ], + "InputChoice11": [ + false, false, true, false + ], + "LayerChoice12": [ + false, true, false, false, false, false, false, false + ], + "InputChoice13": [ + false, true, false, false + ], + "InputChoice14": [ + false, false, false, false, true + ], + "LayerChoice15": [ + false, true, false, false, false, false, false, false + ], + "InputChoice16": [ + false, false, true, false, true + ], + "InputChoice17": [ + false, false, false, false, true + ], + "LayerChoice18": [ + true, false, false, false, false, false, false, false + ], + "InputChoice19": [ + false, false, true, true, true, true + ], + "InputChoice20": [ + true, false, false, false, false + ], + "LayerChoice21": [ + false, false, false, false, false, false, true, false + ], + "InputChoice22": [ + false, true, true, false, false, false, false + ], + "InputChoice23": [ + false, true, false, false, false + ], + "LayerChoice24": [ + false, false, false, false, false, true, false, false + ], + "InputChoice25": [ + false, true, false, true, true, false, true, true + ], + "InputChoice26": [ + false, false, true, false, false + ], + "LayerChoice27": [ + false, false, false, false, false, true, false, false + ], + "InputChoice28": [ + false, false, false, false, false, true, false, true, true + ], + "InputChoice29": [ + true, false, false, false, false + ], + "LayerChoice30": [ + false, false, false, false, false, false, false, true + ], + "InputChoice31": [ + true, true, false, false, true, false, false, true, true, false + ], + "InputChoice32": [ + true, false, false, false, false + ], + "LayerChoice33": [ + false, false, false, false, true, false, false, false + ], + "InputChoice34": [ + true, false, false, true, true, true, true, false, false, false, false + ], + "InputChoice35": [ + false, false, false, true, false + ], + "LayerChoice36": [ + false, true, false, false, false, false, false, false + ], + "InputChoice37": [ + true, true, false, true, false, true, false, false, true, false, false, false + ], + "InputChoice38": [ + false, false, false, true, false + ], + "LayerChoice39": [ + false, false, true, false, false, false, false, false + ], + "InputChoice40": [ + true, true, false, false, false, false, true, false, false, true, true, false, true + ], + "InputChoice41": [ + false, false, false, true, false + ], + "LayerChoice42": [ + true, false, false, false, false, false, false, false + ], + "InputChoice43": [ + false, false, true, false, false, false, true, true, true, false, true, true, false, false + ], + "InputChoice44": [ + false, false, false, false, true + ], + "LayerChoice45": [ + false, false, false, true, false, false, false, false + ], + "InputChoice46": [ + true, false, false, false, false, false, true, false, false, false, true, true, false, false, true + ], + "InputChoice47": [ + false, false, false, true, false + ], + "LayerChoice48": [ + false, false, true, false, false, false, false, false + ], + "InputChoice49": [ + false, false, false, false, false, false, false, false, false, true, true, false, true, false, true, false + ], + "InputChoice50": [ + false, false, false, false, true + ], + "LayerChoice51": [ + false, false, false, false, true, false, false, false + ], + "InputChoice52": [ + false, true, true, true, true, false, false, true, false, true, false, false, false, false, true, false, false + ], + "InputChoice53": [ + false, false, true, false, false + ], + "LayerChoice54": [ + false, false, false, true, false, false, false, false + ], + "InputChoice55": [ + false, false, false, false, false, true, false, false, false, false, false, false, false, true, true, true, false, true + ], + "InputChoice56": [ + false, false, true, false, false + ], + "LayerChoice57": [ + false, false, false, true, false, false, false, false + ], + "InputChoice58": [ + false, false, false, true, false, false, false, false, false, false, true, false, false, false, true, false, false, false, false + ], + "InputChoice59": [ + false, true, false, false, false + ], + "LayerChoice60": [ + false, false, false, false, false, true, false, false + ], + "InputChoice61": [ + true, true, false, false, false, false, false, false, false, false, true, true, false, false, true, true, true, true, false, false + ], + "InputChoice62": [ + true, false, false, false, false + ], + "LayerChoice63": [ + false, false, false, false, false, false, false, true + ], + "InputChoice64": [ + false, true, true, true, false, false, false, true, false, true, true, true, true, false, true, false, false, false, false, false, false + ], + "InputChoice65": [ + false, false, false, false, true + ], + "LayerChoice66": [ + false, false, false, false, false, false, false, true + ], + "InputChoice67": [ + false, false, true, true, true, true, false, true, false, true, true, false, false, false, false, true, false, false, false, false, false, true + ], + "InputChoice68": [ + false, false, false, true, false + ], + "LayerChoice69": [ + false, false, false, true, false, false, false, false + ], + "InputChoice70": [ + true, false, false, true, false, false, false, true, false, false, false, false, true, false, false, false, true, false, false, false, false, false, false + ] +} diff --git a/examples/nas/textnas/run_retrain.sh b/examples/nas/textnas/run_retrain.sh index 5c8ea66ae9..1f02121e31 100755 --- a/examples/nas/textnas/run_retrain.sh +++ b/examples/nas/textnas/run_retrain.sh @@ -4,7 +4,7 @@ export PYTHONPATH="$(pwd)" export CUDA_VISIBLE_DEVICES=0 -python -u retrain.py \ +python3 -u retrain.py \ --train_ratio=1.0 \ --valid_ratio=1.0 \ --min_count=1 \ @@ -36,6 +36,6 @@ python -u retrain.py \ --child_lr_T_0=10 \ --child_lr_T_mul=2 \ --multi_path=True \ - --child_fixed_arc="./checkpoints/architecture_00.json" \ + --child_fixed_arc="./arc/final_arc.json" \ --fixed_seed=True \ "$@" From d2c57770967fc14fe9d6172606b8c2e1903d4e3b Mon Sep 17 00:00:00 2001 From: RayMeng8 Date: Wed, 8 Apr 2020 02:18:57 +0800 Subject: [PATCH 2/2] Add supported data types for PBT tuner (#2271) --- .../mnist-pbt-tuner-pytorch/__init__.py | 0 .../trials/mnist-pbt-tuner-pytorch/mnist.py | 4 +- src/sdk/pynni/nni/pbt_tuner/pbt_tuner.py | 124 +++++++++++++++--- 3 files changed, 111 insertions(+), 17 deletions(-) delete mode 100644 examples/trials/mnist-pbt-tuner-pytorch/__init__.py diff --git a/examples/trials/mnist-pbt-tuner-pytorch/__init__.py b/examples/trials/mnist-pbt-tuner-pytorch/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/examples/trials/mnist-pbt-tuner-pytorch/mnist.py b/examples/trials/mnist-pbt-tuner-pytorch/mnist.py index 2161191e9e..b8653b40dc 100644 --- a/examples/trials/mnist-pbt-tuner-pytorch/mnist.py +++ b/examples/trials/mnist-pbt-tuner-pytorch/mnist.py @@ -155,8 +155,8 @@ def get_params(): help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') - parser.add_argument('--epochs', type=int, default=10, metavar='N', - help='number of epochs to train (default: 10)') + parser.add_argument('--epochs', type=int, default=1, metavar='N', + help='number of epochs to train (default: 1)') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--no_cuda', action='store_true', default=False, diff --git a/src/sdk/pynni/nni/pbt_tuner/pbt_tuner.py b/src/sdk/pynni/nni/pbt_tuner/pbt_tuner.py index 9e4acd586c..e943752e84 100755 --- a/src/sdk/pynni/nni/pbt_tuner/pbt_tuner.py +++ b/src/sdk/pynni/nni/pbt_tuner/pbt_tuner.py @@ -4,9 +4,11 @@ import copy import logging import os +import random import numpy as np import nni +import nni.parameter_expressions from nni.tuner import Tuner from nni.utils import OptimizeMode, extract_scalar_reward, split_index, json2parameter, json2space @@ -14,7 +16,42 @@ logger = logging.getLogger('pbt_tuner_AutoML') -def exploit_and_explore(bot_trial_info, top_trial_info, factors, epoch, search_space): +def perturbation(hyperparameter_type, value, resample_probablity, uv, ub, lv, lb, random_state): + """ + Perturbation for hyperparameters + + Parameters + ---------- + hyperparameter_type : str + type of hyperparameter + value : list + parameters for sampling hyperparameter + resample_probability : float + probability for resampling + uv : float/int + upper value after perturbation + ub : float/int + upper bound + lv : float/int + lower value after perturbation + lb : float/int + lower bound + random_state : RandomState + random state + """ + if random.random() < resample_probablity: + if hyperparameter_type == "choice": + return value.index(nni.parameter_expressions.choice(value, random_state)) + else: + return getattr(nni.parameter_expressions, hyperparameter_type)(*(value + [random_state])) + else: + if random.random() > 0.5: + return min(uv, ub) + else: + return max(lv, lb) + + +def exploit_and_explore(bot_trial_info, top_trial_info, factor, resample_probability, epoch, search_space): """ Replace checkpoint of bot_trial with top, and perturb hyperparameters @@ -24,8 +61,10 @@ def exploit_and_explore(bot_trial_info, top_trial_info, factors, epoch, search_s bottom model whose parameters should be replaced top_trial_info : TrialInfo better model - factors : float - factors for perturbation + factor : float + factor for perturbation + resample_probability : float + probability for resampling epoch : int step of PBTTuner search_space : dict @@ -34,21 +73,72 @@ def exploit_and_explore(bot_trial_info, top_trial_info, factors, epoch, search_s bot_checkpoint_dir = bot_trial_info.checkpoint_dir top_hyper_parameters = top_trial_info.hyper_parameters hyper_parameters = copy.deepcopy(top_hyper_parameters) - # TODO think about different type of hyperparameters for 1.perturbation 2.within search space + random_state = np.random.RandomState() for key in hyper_parameters.keys(): + hyper_parameter = hyper_parameters[key] if key == 'load_checkpoint_dir': hyper_parameters[key] = hyper_parameters['save_checkpoint_dir'] + continue elif key == 'save_checkpoint_dir': hyper_parameters[key] = os.path.join(bot_checkpoint_dir, str(epoch)) - elif isinstance(hyper_parameters[key], float): - perturb = np.random.choice(factors) - val = hyper_parameters[key] * perturb + continue + elif search_space[key]["_type"] == "choice": + choices = search_space[key]["_value"] + ub, uv = len(choices) - 1, choices.index(hyper_parameter["_value"]) + 1 + lb, lv = 0, choices.index(hyper_parameter["_value"]) - 1 + elif search_space[key]["_type"] == "randint": lb, ub = search_space[key]["_value"][:2] - if search_space[key]["_type"] in ("uniform", "normal"): - val = np.clip(val, lb, ub).item() - hyper_parameters[key] = val + ub -= 1 + uv = hyper_parameter + 1 + lv = hyper_parameter - 1 + elif search_space[key]["_type"] == "uniform": + lb, ub = search_space[key]["_value"][:2] + perturb = (ub - lb) * factor + uv = hyper_parameter + perturb + lv = hyper_parameter - perturb + elif search_space[key]["_type"] == "quniform": + lb, ub, q = search_space[key]["_value"][:3] + multi = round(hyper_parameter / q) + uv = (multi + 1) * q + lv = (multi - 1) * q + elif search_space[key]["_type"] == "loguniform": + lb, ub = search_space[key]["_value"][:2] + perturb = (np.log(ub) - np.log(lb)) * factor + uv = np.exp(min(np.log(hyper_parameter) + perturb, np.log(ub))) + lv = np.exp(max(np.log(hyper_parameter) - perturb, np.log(lb))) + elif search_space[key]["_type"] == "qloguniform": + lb, ub, q = search_space[key]["_value"][:3] + multi = round(hyper_parameter / q) + uv = (multi + 1) * q + lv = (multi - 1) * q + elif search_space[key]["_type"] == "normal": + sigma = search_space[key]["_value"][1] + perturb = sigma * factor + uv = ub = hyper_parameter + perturb + lv = lb = hyper_parameter - perturb + elif search_space[key]["_type"] == "qnormal": + q = search_space[key]["_value"][2] + uv = ub = hyper_parameter + q + lv = lb = hyper_parameter - q + elif search_space[key]["_type"] == "lognormal": + sigma = search_space[key]["_value"][1] + perturb = sigma * factor + uv = ub = np.exp(np.log(hyper_parameter) + perturb) + lv = lb = np.exp(np.log(hyper_parameter) - perturb) + elif search_space[key]["_type"] == "qlognormal": + q = search_space[key]["_value"][2] + uv = ub = hyper_parameter + q + lv, lb = hyper_parameter - q, 1E-10 else: + logger.warning("Illegal type to perturb: %s", search_space[key]["_type"]) continue + if search_space[key]["_type"] == "choice": + idx = perturbation(search_space[key]["_type"], search_space[key]["_value"], + resample_probability, uv, ub, lv, lb, random_state) + hyper_parameters[key] = {'_index': idx, '_value': choices[idx]} + else: + hyper_parameters[key] = perturbation(search_space[key]["_type"], search_space[key]["_value"], + resample_probability, uv, ub, lv, lb, random_state) bot_trial_info.hyper_parameters = hyper_parameters bot_trial_info.clean_id() @@ -70,7 +160,8 @@ def clean_id(self): class PBTTuner(Tuner): - def __init__(self, optimize_mode="maximize", all_checkpoint_dir=None, population_size=10, factors=(1.2, 0.8), fraction=0.2): + def __init__(self, optimize_mode="maximize", all_checkpoint_dir=None, population_size=10, factor=0.2, + resample_probability=0.25, fraction=0.2): """ Initialization @@ -82,8 +173,10 @@ def __init__(self, optimize_mode="maximize", all_checkpoint_dir=None, population directory to store training model checkpoint population_size : int number of trials for each epoch - factors : tuple - factors for perturbation + factor : float + factor for perturbation + resample_probability : float + probability for resampling fraction : float fraction for selecting bottom and top trials """ @@ -93,7 +186,8 @@ def __init__(self, optimize_mode="maximize", all_checkpoint_dir=None, population logger.info("Checkpoint dir is set to %s by default.", all_checkpoint_dir) self.all_checkpoint_dir = all_checkpoint_dir self.population_size = population_size - self.factors = factors + self.factor = factor + self.resample_probability = resample_probability self.fraction = fraction # defined in trial code #self.perturbation_interval = perturbation_interval @@ -237,7 +331,7 @@ def receive_trial_result(self, parameter_id, parameters, value, **kwargs): bottoms = self.finished[self.finished_trials - cutoff:] for bottom in bottoms: top = np.random.choice(tops) - exploit_and_explore(bottom, top, self.factors, self.epoch, self.searchspace_json) + exploit_and_explore(bottom, top, self.factor, self.resample_probability, self.epoch, self.searchspace_json) for trial in self.finished: if trial not in bottoms: trial.clean_id()