diff --git a/examples/notebooks/Retiarii_example_multi-trial_NAS.ipynb b/examples/notebooks/Retiarii_example_multi-trial_NAS.ipynb new file mode 100644 index 0000000000..2143b842b4 --- /dev/null +++ b/examples/notebooks/Retiarii_example_multi-trial_NAS.ipynb @@ -0,0 +1,952 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Retiarii Example - Multi-trial NAS" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This example will show Retiarii's ability to **express** and **explore** the model space for Neural Architecture Search and Hyper-Parameter Tuning in a simple way. The video demo is in [YouTube](https://youtu.be/eQUlABCO0o8) and [Bilibili](https://www.bilibili.com/video/BV14h411v7kZ/).\n", + "\n", + "Let's start the journey with Retiarii!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1: Express the Model Space" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Model space is defined by users to express a set of models that they want to explore, which contains potentially good-performing models. In Retiarii framework, a model space is defined with two parts: a base model and possible mutations on the base model." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 1.1: Define the Base Model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Defining a base model is almost the same as defining a PyTorch (or TensorFlow) model. Usually, you only need to replace the code ``import torch.nn as nn`` with ``import nni.retiarii.nn.pytorch as nn`` to use NNI wrapped PyTorch modules. Below is a very simple example of defining a base model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch.nn.functional as F\n", + "import nni.retiarii.nn.pytorch as nn\n", + "\n", + "class Net(nn.Module):\n", + " def __init__(self):\n", + " super(Net, self).__init__()\n", + " self.conv1 = nn.Conv2d(3, 6, 3, padding=1)\n", + " self.pool = nn.MaxPool2d(2, 2)\n", + " self.conv2 = nn.Conv2d(6, 16, 3, padding=1)\n", + " self.conv3 = nn.Conv2d(16, 16, 1)\n", + "\n", + " self.bn = nn.BatchNorm2d(16)\n", + "\n", + " self.gap = nn.AdaptiveAvgPool2d(4)\n", + " self.fc1 = nn.Linear(16 * 4 * 4, 120)\n", + " self.fc2 = nn.Linear(120, 84)\n", + " self.fc3 = nn.Linear(84, 10)\n", + "\n", + " def forward(self, x):\n", + " bs = x.size(0)\n", + "\n", + " x = self.pool(F.relu(self.conv1(x)))\n", + " x0 = F.relu(self.conv2(x))\n", + " x1 = F.relu(self.conv3(x0))\n", + "\n", + " x1 += x0\n", + " x = self.pool(self.bn(x1))\n", + "\n", + " x = self.gap(x).view(bs, -1)\n", + " x = F.relu(self.fc1(x))\n", + " x = F.relu(self.fc2(x))\n", + " x = self.fc3(x)\n", + " return x\n", + " \n", + "model = Net()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 1.2: Define the Model Mutations" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A base model is only one concrete model, not a model space. NNI provides APIs and primitives for users to express how the base model can be mutated, i.e., a model space that includes many models. The following will use inline Mutation APIs ``LayerChoice`` to choose a layer from candidate operations and use ``InputChoice`` to try out skip connection." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch.nn.functional as F\n", + "import nni.retiarii.nn.pytorch as nn\n", + "\n", + "class Net(nn.Module):\n", + " def __init__(self):\n", + " super(Net, self).__init__()\n", + " # self.conv1 = nn.Conv2d(3, 6, 3, padding=1)\n", + " self.conv1 = nn.LayerChoice([nn.Conv2d(3, 6, 3, padding=1), nn.Conv2d(3, 6, 5, padding=2)])\n", + " self.pool = nn.MaxPool2d(2, 2)\n", + " # self.conv2 = nn.Conv2d(6, 16, 3, padding=1)\n", + " self.conv2 = nn.LayerChoice([nn.Conv2d(6, 16, 3, padding=1), nn.Conv2d(6, 16, 5, padding=2)])\n", + " self.conv3 = nn.Conv2d(16, 16, 1)\n", + "\n", + " self.skipconnect = nn.InputChoice(n_candidates=2)\n", + " self.bn = nn.BatchNorm2d(16)\n", + "\n", + " self.gap = nn.AdaptiveAvgPool2d(4)\n", + " self.fc1 = nn.Linear(16 * 4 * 4, 120)\n", + " self.fc2 = nn.Linear(120, 84)\n", + " self.fc3 = nn.Linear(84, 10)\n", + "\n", + " def forward(self, x):\n", + " bs = x.size(0)\n", + "\n", + " x = self.pool(F.relu(self.conv1(x)))\n", + " x0 = F.relu(self.conv2(x))\n", + " x1 = F.relu(self.conv3(x0))\n", + "\n", + " x1 = self.skipconnect([x1, x1+x0])\n", + " x = self.pool(self.bn(x1))\n", + "\n", + " x = self.gap(x).view(bs, -1)\n", + " x = F.relu(self.fc1(x))\n", + " x = F.relu(self.fc2(x))\n", + " x = self.fc3(x)\n", + " return x\n", + " \n", + "model = Net()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2: Explore the Model Space" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will demo ths **multi-trial** NAS method first. In the multi-trial NAS process, the search strategy repeatedly generates new models, and the model evaluator is for training and validating each generated model. The obtained performance of a generated model is collected and sent to the search strategy for generating better models. \n", + "\n", + "Users can choose a proper search strategy to explore the model space, and use a chosen or user-defined model evaluator to evaluate the performance of each sampled model." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2.1: Choose or Write a Search Strategy\n", + "\n", + "Currently, Retiarii supports many common strategies, such as Random, Regularized evolution and TPE, etc. According to the APIs of Retiarii, you can customize a new strategy easily, and there we use the TPE strategy as an example." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import nni.retiarii.strategy as strategy\n", + "\n", + "simple_strategy = strategy.TPEStrategy() # choice: Random, GridSearch, RegularizedEvolution, TPEStrategy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2.2: Choose or Write a Model Evaluator" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The model evaluator should correctly identify the use case of the model and the optimization goal. For example, on a classification task, an dataset is needed, the loss function could be cross entropy and the optimized metric could be the accuracy.\n", + "\n", + "Retiarii provides two ways for users to write a new model evaluator. In the context of PyTorch, Retiarii has provided two built-in model evaluators, designed for simple use cases: classification and regression. These two evaluators are built upon the awesome library PyTorch-Lightning. Here we take a classification task on CIFAR10 dataset as an example." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Files already downloaded and verified\n", + "Files already downloaded and verified\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: True\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:15:27] INFO (lightning/MainThread) GPU available: True, used: True\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:15:27] INFO (lightning/MainThread) TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:15:27] INFO (lightning/MainThread) LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n" + ] + } + ], + "source": [ + "from torchvision import transforms\n", + "from torchvision.datasets import CIFAR10\n", + "from nni.retiarii import serialize\n", + "import nni.retiarii.evaluator.pytorch.lightning as pl\n", + "\n", + "transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])\n", + "train_dataset = serialize(CIFAR10, root=\"./data\", train=True, download=True, transform=transform)\n", + "test_dataset = serialize(CIFAR10, root=\"./data\", train=False, download=True, transform=transform)\n", + "\n", + "trainer = pl.Classification(train_dataloader=pl.DataLoader(train_dataset, batch_size=64),\n", + " val_dataloaders=pl.DataLoader(test_dataset, batch_size=64),\n", + " max_epochs=2, gpus=[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2.3: Configure the Experiment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After all the above are prepared, it is time to configure an experiment to do the model search. The basic experiment configuration is as follows, and advanced configuration reference on [this page](https://nni.readthedocs.io/en/stable/reference/experiment_config.html).\n", + "\n", + "NNI allows users to run experiments in different training platforms to speed up the search, like Local Machine, Remote Servers, OpenPAI, Kubeflow, FrameworkController on K8S, DLWorkspace, Azure Machine Learning, AdaptDL, other cloud options, and even Hybrid mode. There we use the local mode with GPU speeding up." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from nni.retiarii.experiment.pytorch import RetiariiExeConfig, RetiariiExperiment\n", + "\n", + "exp = RetiariiExperiment(model, trainer, [], simple_strategy)\n", + "\n", + "exp_config = RetiariiExeConfig('local')\n", + "exp_config.experiment_name = 'Retiarii example'\n", + "exp_config.trial_concurrency = 2\n", + "exp_config.max_trial_number = 10\n", + "exp_config.trial_gpu_number = 2\n", + "exp_config.max_experiment_duration = '5m'\n", + "exp_config.training_service.use_active_gpu = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2.4: Run and View the Experiment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can launch the experiment now! " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:15:34] INFO (nni.experiment/MainThread) Creating experiment, Experiment ID: d9cseb3g\n", + "[2021-06-07 11:15:34] INFO (nni.experiment/MainThread) Connecting IPC pipe...\n", + "[2021-06-07 11:15:34] INFO (nni.experiment/MainThread) Statring web server...\n", + "[2021-06-07 11:15:35] INFO (nni.experiment/MainThread) Setting up...\n", + "[2021-06-07 11:15:36] INFO (nni.runtime.msg_dispatcher_base/Thread-6) Dispatcher started\n", + "[2021-06-07 11:15:36] INFO (nni.retiarii.experiment.pytorch/MainThread) Web UI URLs: http://127.0.0.1:8745\n", + "[2021-06-07 11:15:36] INFO (nni.retiarii.experiment.pytorch/MainThread) Start strategy...\n", + "[2021-06-07 11:15:36] INFO (nni.retiarii.strategy.tpe_strategy/MainThread) TPE strategy has been started.\n", + "[2021-06-07 11:15:36] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.001164 seconds\n", + "[2021-06-07 11:15:36] INFO (hyperopt.tpe/MainThread) TPE using 0 trials\n", + "[2021-06-07 11:15:36] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.001256 seconds\n", + "[2021-06-07 11:15:36] INFO (hyperopt.tpe/MainThread) TPE using 0 trials\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:16:31] INFO (lightning/Thread-5) GPU available: True, used: True\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:16:31] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:16:31] INFO (lightning/Thread-5) LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n", + "Files already downloaded and verified\n", + "Files already downloaded and verified\n", + "[2021-06-07 11:16:33] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.002677 seconds\n", + "[2021-06-07 11:16:33] INFO (hyperopt.tpe/MainThread) TPE using 1/1 trials with best loss 0.626600\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: True\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:16:36] INFO (lightning/Thread-5) GPU available: True, used: True\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:16:36] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:16:36] INFO (lightning/Thread-5) LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n", + "Files already downloaded and verified\n", + "[2021-06-07 11:16:37] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.002730 seconds\n", + "[2021-06-07 11:16:37] INFO (hyperopt.tpe/MainThread) TPE using 1/1 trials with best loss 0.626600\n", + "Files already downloaded and verified\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: True\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:17:26] INFO (lightning/Thread-5) GPU available: True, used: True\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:17:26] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:17:26] INFO (lightning/Thread-5) LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n", + "Files already downloaded and verified\n", + "[2021-06-07 11:17:27] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.003051 seconds\n", + "[2021-06-07 11:17:27] INFO (hyperopt.tpe/MainThread) TPE using 2/2 trials with best loss 0.594700\n", + "Files already downloaded and verified\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: True\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:17:31] INFO (lightning/Thread-5) GPU available: True, used: True\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:17:31] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:17:31] INFO (lightning/Thread-5) LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n", + "Files already downloaded and verified\n", + "[2021-06-07 11:17:31] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.002537 seconds\n", + "[2021-06-07 11:17:31] INFO (hyperopt.tpe/MainThread) TPE using 3/3 trials with best loss 0.594700\n", + "Files already downloaded and verified\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: True\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:18:21] INFO (lightning/Thread-5) GPU available: True, used: True\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:18:21] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:18:21] INFO (lightning/Thread-5) LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n", + "Files already downloaded and verified\n", + "[2021-06-07 11:18:22] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.002532 seconds\n", + "[2021-06-07 11:18:22] INFO (hyperopt.tpe/MainThread) TPE using 4/4 trials with best loss 0.594700\n", + "Files already downloaded and verified\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: True\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:18:26] INFO (lightning/Thread-5) GPU available: True, used: True\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:18:26] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:18:26] INFO (lightning/Thread-5) LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n", + "Files already downloaded and verified\n", + "Files already downloaded and verified\n", + "[2021-06-07 11:18:28] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.002615 seconds\n", + "[2021-06-07 11:18:28] INFO (hyperopt.tpe/MainThread) TPE using 6/6 trials with best loss 0.594700\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: True\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:19:16] INFO (lightning/Thread-5) GPU available: True, used: True\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:19:16] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:19:16] INFO (lightning/Thread-5) LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n", + "Files already downloaded and verified\n", + "Files already downloaded and verified\n", + "[2021-06-07 11:19:18] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.002395 seconds\n", + "[2021-06-07 11:19:18] INFO (hyperopt.tpe/MainThread) TPE using 7/7 trials with best loss 0.594700\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: True\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:19:21] INFO (lightning/Thread-5) GPU available: True, used: True\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:19:21] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:19:21] INFO (lightning/Thread-5) LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n", + "Files already downloaded and verified\n", + "[2021-06-07 11:19:23] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.002959 seconds\n", + "[2021-06-07 11:19:23] INFO (hyperopt.tpe/MainThread) TPE using 7/7 trials with best loss 0.594700\n", + "Files already downloaded and verified\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: True\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:20:12] INFO (lightning/Thread-5) GPU available: True, used: True\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:20:12] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:20:12] INFO (lightning/Thread-5) LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n", + "Files already downloaded and verified\n", + "[2021-06-07 11:20:13] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.003336 seconds\n", + "[2021-06-07 11:20:13] INFO (hyperopt.tpe/MainThread) TPE using 8/8 trials with best loss 0.594700\n", + "Files already downloaded and verified\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: True\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:20:22] INFO (lightning/Thread-5) GPU available: True, used: True\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:20:22] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:20:22] INFO (lightning/Thread-5) LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n", + "[2021-06-07 11:20:22] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.002093 seconds\n", + "[2021-06-07 11:20:22] INFO (hyperopt.tpe/MainThread) TPE using 9/9 trials with best loss 0.593200\n", + "Files already downloaded and verified\n", + "Files already downloaded and verified\n", + "[2021-06-07 11:20:26] INFO (nni.retiarii.experiment.pytorch/Thread-7) Stopping experiment, please wait...\n", + "[2021-06-07 11:20:26] INFO (nni.runtime.msg_dispatcher_base/Thread-6) Dispatcher exiting...\n", + "[2021-06-07 11:20:26] INFO (nni.retiarii.experiment.pytorch/MainThread) Strategy exit\n", + "[2021-06-07 11:20:26] INFO (nni.retiarii.experiment.pytorch/MainThread) Waiting for experiment to become DONE (you can ctrl+c if there is no running trial jobs)...\n", + "[2021-06-07 11:20:27] INFO (nni.retiarii.experiment.pytorch/Thread-7) Experiment stopped\n", + "[2021-06-07 11:20:29] INFO (nni.runtime.msg_dispatcher_base/Thread-6) Dispatcher terminiated\n" + ] + } + ], + "source": [ + "exp.run(exp_config, 8745)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Besides, NNI provides WebUI to help users view the experiment results and make more advanced analysis." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2.5: Export the top Model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After searching, exporting the top model script is also very convenient." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Final model:\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "\n", + "import nni.retiarii.nn.pytorch\n", + "\n", + "import nni\n", + "import torch\n", + "\n", + "\n", + "class _model__conv1(nn.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.layerchoice__mutation_1_0 = torch.nn.modules.conv.Conv2d(padding=1, in_channels=3, out_channels=6, kernel_size=3)\n", + "\n", + " def forward(self, *_inputs):\n", + " layerchoice__mutation_1_0 = self.layerchoice__mutation_1_0(_inputs[0])\n", + " return layerchoice__mutation_1_0\n", + "\n", + "\n", + "\n", + "class _model__conv2(nn.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.layerchoice__mutation_2_1 = torch.nn.modules.conv.Conv2d(padding=2, in_channels=6, out_channels=16, kernel_size=5)\n", + "\n", + " def forward(self, *_inputs):\n", + " layerchoice__mutation_2_1 = self.layerchoice__mutation_2_1(_inputs[0])\n", + " return layerchoice__mutation_2_1\n", + "\n", + "\n", + "\n", + "class _model(nn.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.__conv1 = _model__conv1()\n", + " self.__pool = torch.nn.modules.pooling.MaxPool2d(kernel_size=2, stride=2)\n", + " self.__conv2 = _model__conv2()\n", + " self.__conv3 = torch.nn.modules.conv.Conv2d(in_channels=16, out_channels=16, kernel_size=1)\n", + " self.__skipconnect = nni.retiarii.nn.pytorch.ChosenInputs(chosen=[1], reduction='sum')\n", + " self.__bn = torch.nn.modules.batchnorm.BatchNorm2d(num_features=16)\n", + " self.__gap = torch.nn.modules.pooling.AdaptiveAvgPool2d(output_size=4)\n", + " self.__fc1 = torch.nn.modules.linear.Linear(in_features=256, out_features=120)\n", + " self.__fc2 = torch.nn.modules.linear.Linear(in_features=120, out_features=84)\n", + " self.__fc3 = torch.nn.modules.linear.Linear(in_features=84, out_features=10)\n", + "\n", + " def forward(self, x__1):\n", + " __Constant1 = -1\n", + " __Constant2 = 1\n", + " __Constant4 = False\n", + " __Constant5 = 0\n", + " __conv1 = self.__conv1(x__1)\n", + " __aten__size6 = x__1.size(dim=__Constant5)\n", + " __relu9 = F.relu(__conv1, __Constant4)\n", + " __ListConstruct21 = [__aten__size6, __Constant1]\n", + " __pool = self.__pool(__relu9)\n", + " __conv2 = self.__conv2(__pool)\n", + " __relu11 = F.relu(__conv2, __Constant4)\n", + " __conv3 = self.__conv3(__relu11)\n", + " __relu13 = F.relu(__conv3, __Constant4)\n", + " __aten__add15 = __relu13.add(other=__relu11, alpha=__Constant2)\n", + " __ListConstruct16 = [__relu13, __aten__add15]\n", + " __skipconnect = self.__skipconnect(__ListConstruct16)\n", + " __bn = self.__bn(__skipconnect)\n", + " __pool__19 = self.__pool(__bn)\n", + " __gap = self.__gap(__pool__19)\n", + " __aten__view22 = __gap.view(size=__ListConstruct21)\n", + " __fc1 = self.__fc1(__aten__view22)\n", + " __relu24 = F.relu(__fc1, __Constant4)\n", + " __fc2 = self.__fc2(__relu24)\n", + " __relu26 = F.relu(__fc2, __Constant4)\n", + " __fc3 = self.__fc3(__relu26)\n", + " return __fc3\n" + ] + } + ], + "source": [ + "print('Final model:')\n", + "for model_code in exp.export_top_models():\n", + " print(model_code)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/examples/notebooks/Retiarii_example_one-shot_NAS.ipynb b/examples/notebooks/Retiarii_example_one-shot_NAS.ipynb new file mode 100644 index 0000000000..11497c9420 --- /dev/null +++ b/examples/notebooks/Retiarii_example_one-shot_NAS.ipynb @@ -0,0 +1,354 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Retiarii Example - One-shot NAS" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This example will show Retiarii's ability to **express** and **explore** the model space for Neural Architecture Search and Hyper-Parameter Tuning in a simple way. The video demo is in [YouTube](https://youtu.be/3nEx9GMHYEk) and [Bilibili](https://www.bilibili.com/video/BV1c54y1V7vx/).\n", + "\n", + "Let's start the journey with Retiarii!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1: Express the Model Space" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Model space is defined by users to express a set of models that they want to explore, which contains potentially good-performing models. In Retiarii framework, a model space is defined with two parts: a base model and possible mutations on the base model." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 1.1: Define the Base Model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Defining a base model is almost the same as defining a PyTorch (or TensorFlow) model. Usually, you only need to replace the code ``import torch.nn as nn`` with ``import nni.retiarii.nn.pytorch as nn`` to use NNI wrapped PyTorch modules. Below is a very simple example of defining a base model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch.nn.functional as F\n", + "import nni.retiarii.nn.pytorch as nn\n", + "\n", + "class Net(nn.Module):\n", + " def __init__(self):\n", + " super(Net, self).__init__()\n", + " self.conv1 = nn.Conv2d(3, 6, 3, padding=1)\n", + " self.pool = nn.MaxPool2d(2, 2)\n", + " self.conv2 = nn.Conv2d(6, 16, 3, padding=1)\n", + " self.conv3 = nn.Conv2d(16, 16, 1)\n", + "\n", + " self.bn = nn.BatchNorm2d(16)\n", + "\n", + " self.gap = nn.AdaptiveAvgPool2d(4)\n", + " self.fc1 = nn.Linear(16 * 4 * 4, 120)\n", + " self.fc2 = nn.Linear(120, 84)\n", + " self.fc3 = nn.Linear(84, 10)\n", + "\n", + " def forward(self, x):\n", + " bs = x.size(0)\n", + "\n", + " x = self.pool(F.relu(self.conv1(x)))\n", + " x0 = F.relu(self.conv2(x))\n", + " x1 = F.relu(self.conv3(x0))\n", + "\n", + " x1 += x0\n", + " x = self.pool(self.bn(x1))\n", + "\n", + " x = self.gap(x).view(bs, -1)\n", + " x = F.relu(self.fc1(x))\n", + " x = F.relu(self.fc2(x))\n", + " x = self.fc3(x)\n", + " return x\n", + " \n", + "model = Net()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 1.2: Define the Model Mutations" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A base model is only one concrete model, not a model space. NNI provides APIs and primitives for users to express how the base model can be mutated, i.e., a model space that includes many models. The following will use inline Mutation APIs ``LayerChoice`` to choose a layer from candidate operations and use ``InputChoice`` to try out skip connection." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch.nn.functional as F\n", + "import nni.retiarii.nn.pytorch as nn\n", + "\n", + "class Net(nn.Module):\n", + " def __init__(self):\n", + " super(Net, self).__init__()\n", + " # self.conv1 = nn.Conv2d(3, 6, 3, padding=1)\n", + " self.conv1 = nn.LayerChoice([nn.Conv2d(3, 6, 3, padding=1), nn.Conv2d(3, 6, 5, padding=2)])\n", + " self.pool = nn.MaxPool2d(2, 2)\n", + " # self.conv2 = nn.Conv2d(6, 16, 3, padding=1)\n", + " self.conv2 = nn.LayerChoice([nn.Conv2d(6, 16, 3, padding=1), nn.Conv2d(6, 16, 5, padding=2)])\n", + " self.conv3 = nn.Conv2d(16, 16, 1)\n", + "\n", + " self.skipconnect = nn.InputChoice(n_candidates=2)\n", + " self.bn = nn.BatchNorm2d(16)\n", + "\n", + " self.gap = nn.AdaptiveAvgPool2d(4)\n", + " self.fc1 = nn.Linear(16 * 4 * 4, 120)\n", + " self.fc2 = nn.Linear(120, 84)\n", + " self.fc3 = nn.Linear(84, 10)\n", + "\n", + " def forward(self, x):\n", + " bs = x.size(0)\n", + "\n", + " x = self.pool(F.relu(self.conv1(x)))\n", + " x0 = F.relu(self.conv2(x))\n", + " x1 = F.relu(self.conv3(x0))\n", + "\n", + " x1 = self.skipconnect([x1, x1+x0])\n", + " x = self.pool(self.bn(x1))\n", + "\n", + " x = self.gap(x).view(bs, -1)\n", + " x = F.relu(self.fc1(x))\n", + " x = F.relu(self.fc2(x))\n", + " x = self.fc3(x)\n", + " return x\n", + " \n", + "model = Net()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2: Explore the Model Space" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With a defined model space, users can explore the space in two ways. One is the multi-trial NAS method, which searchs by evaluating each sampled model independently. The other is using one-shot weight-sharing based search, which consumes much less computational resource compared to the first one. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this part, we focus on this **one-shot** approach. The principle of the One-shot approach is combining all the models in a model space into one big model (usually called super-model or super-graph). It takes charge of both search, training and testing, by training and evaluating this big model.\n", + "\n", + "Retiarii has supported some classic one-shot trainers, like DARTS trainer, ENAS trainer, ProxylessNAS trainer, Single-path trainer, and users can customize a new one-shot trainer according to the APIs provided by Retiarii conveniently.\n", + "\n", + "Here, we show an example to use DARTS trainer manually." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Files already downloaded and verified\n", + "[2021-06-07 11:12:22] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [1/391] acc1 0.093750 (0.093750) loss 2.286068 (2.286068)\n", + "[2021-06-07 11:12:22] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [11/391] acc1 0.093750 (0.089489) loss 2.328799 (2.309416)\n", + "[2021-06-07 11:12:23] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [21/391] acc1 0.093750 (0.092262) loss 2.302527 (2.309082)\n", + "[2021-06-07 11:12:23] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [31/391] acc1 0.109375 (0.099294) loss 2.294730 (2.304962)\n", + "[2021-06-07 11:12:23] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [41/391] acc1 0.203125 (0.103277) loss 2.284227 (2.302716)\n", + "[2021-06-07 11:12:23] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [51/391] acc1 0.078125 (0.106618) loss 2.308704 (2.300639)\n", + "[2021-06-07 11:12:23] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [61/391] acc1 0.203125 (0.110143) loss 2.258595 (2.298042)\n", + "[2021-06-07 11:12:23] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [71/391] acc1 0.078125 (0.112896) loss 2.276706 (2.294709)\n", + "[2021-06-07 11:12:24] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [81/391] acc1 0.078125 (0.116898) loss 2.309119 (2.292235)\n", + "[2021-06-07 11:12:24] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [91/391] acc1 0.093750 (0.118304) loss 2.263757 (2.289659)\n", + "[2021-06-07 11:12:24] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [101/391] acc1 0.109375 (0.119431) loss 2.260739 (2.287132)\n", + "[2021-06-07 11:12:24] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [111/391] acc1 0.109375 (0.121481) loss 2.279930 (2.284314)\n", + "[2021-06-07 11:12:24] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [121/391] acc1 0.046875 (0.122934) loss 2.270205 (2.281701)\n", + "[2021-06-07 11:12:25] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [131/391] acc1 0.156250 (0.125477) loss 2.270163 (2.278612)\n", + "[2021-06-07 11:12:25] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [141/391] acc1 0.171875 (0.126551) loss 2.233467 (2.276326)\n", + "[2021-06-07 11:12:25] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [151/391] acc1 0.109375 (0.127897) loss 2.264694 (2.274296)\n", + "[2021-06-07 11:12:25] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [161/391] acc1 0.250000 (0.132279) loss 2.259590 (2.271723)\n", + "[2021-06-07 11:12:25] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [171/391] acc1 0.093750 (0.134868) loss 2.240986 (2.269037)\n", + "[2021-06-07 11:12:25] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [181/391] acc1 0.218750 (0.137690) loss 2.218153 (2.266567)\n", + "[2021-06-07 11:12:25] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [191/391] acc1 0.078125 (0.140134) loss 2.260816 (2.264373)\n", + "[2021-06-07 11:12:26] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [201/391] acc1 0.156250 (0.144123) loss 2.191213 (2.261285)\n", + "[2021-06-07 11:12:26] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [211/391] acc1 0.125000 (0.146919) loss 2.245425 (2.258747)\n", + "[2021-06-07 11:12:26] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [221/391] acc1 0.218750 (0.150028) loss 2.216708 (2.255553)\n", + "[2021-06-07 11:12:26] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [231/391] acc1 0.250000 (0.153003) loss 2.195549 (2.252894)\n", + "[2021-06-07 11:12:26] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [241/391] acc1 0.234375 (0.155666) loss 2.169693 (2.249465)\n", + "[2021-06-07 11:12:26] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [251/391] acc1 0.218750 (0.158989) loss 2.174878 (2.246355)\n", + "[2021-06-07 11:12:27] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [261/391] acc1 0.312500 (0.162775) loss 2.117693 (2.243113)\n", + "[2021-06-07 11:12:27] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [271/391] acc1 0.265625 (0.166686) loss 2.136203 (2.239288)\n", + "[2021-06-07 11:12:27] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [281/391] acc1 0.234375 (0.169095) loss 2.213463 (2.236377)\n", + "[2021-06-07 11:12:27] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [291/391] acc1 0.218750 (0.171338) loss 2.114096 (2.232892)\n", + "[2021-06-07 11:12:27] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [301/391] acc1 0.203125 (0.173432) loss 2.134074 (2.229637)\n", + "[2021-06-07 11:12:28] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [311/391] acc1 0.265625 (0.175291) loss 2.041354 (2.225920)\n", + "[2021-06-07 11:12:28] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [321/391] acc1 0.250000 (0.176840) loss 2.081122 (2.222280)\n", + "[2021-06-07 11:12:28] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [331/391] acc1 0.140625 (0.178578) loss 2.124206 (2.219168)\n", + "[2021-06-07 11:12:28] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [341/391] acc1 0.250000 (0.180169) loss 2.077291 (2.215540)\n", + "[2021-06-07 11:12:28] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [351/391] acc1 0.250000 (0.182381) loss 2.077531 (2.211650)\n", + "[2021-06-07 11:12:28] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [361/391] acc1 0.312500 (0.185033) loss 2.016619 (2.207455)\n", + "[2021-06-07 11:12:29] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [371/391] acc1 0.250000 (0.187163) loss 2.139604 (2.202785)\n", + "[2021-06-07 11:12:29] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [381/391] acc1 0.281250 (0.189099) loss 2.033739 (2.198564)\n", + "[2021-06-07 11:12:29] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [1/2] Step [391/391] acc1 0.275000 (0.190441) loss 1.988353 (2.194509)\n", + "[2021-06-07 11:12:29] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [1/391] acc1 0.296875 (0.296875) loss 2.083627 (2.083627)\n", + "[2021-06-07 11:12:30] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [11/391] acc1 0.265625 (0.251420) loss 2.042856 (2.050898)\n", + "[2021-06-07 11:12:30] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [21/391] acc1 0.234375 (0.273065) loss 2.005307 (2.021047)\n", + "[2021-06-07 11:12:30] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [31/391] acc1 0.375000 (0.269657) loss 1.934093 (2.014375)\n", + "[2021-06-07 11:12:30] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [41/391] acc1 0.265625 (0.277439) loss 2.007705 (2.003260)\n", + "[2021-06-07 11:12:30] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [51/391] acc1 0.218750 (0.278799) loss 2.014602 (2.001039)\n", + "[2021-06-07 11:12:31] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [61/391] acc1 0.187500 (0.278945) loss 2.088407 (1.995837)\n", + "[2021-06-07 11:12:31] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [71/391] acc1 0.343750 (0.285651) loss 1.894479 (1.988130)\n", + "[2021-06-07 11:12:31] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [81/391] acc1 0.281250 (0.289159) loss 1.869002 (1.979012)\n", + "[2021-06-07 11:12:31] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [91/391] acc1 0.265625 (0.291552) loss 1.848354 (1.971483)\n", + "[2021-06-07 11:12:31] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [101/391] acc1 0.406250 (0.290996) loss 1.840711 (1.964297)\n", + "[2021-06-07 11:12:31] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [111/391] acc1 0.390625 (0.294764) loss 1.905811 (1.958954)\n", + "[2021-06-07 11:12:32] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [121/391] acc1 0.250000 (0.296617) loss 1.935214 (1.952315)\n", + "[2021-06-07 11:12:32] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [131/391] acc1 0.281250 (0.299618) loss 1.901846 (1.944634)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-07 11:12:32] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [141/391] acc1 0.312500 (0.302970) loss 1.854658 (1.939751)\n", + "[2021-06-07 11:12:32] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [151/391] acc1 0.218750 (0.305257) loss 1.927818 (1.934704)\n", + "[2021-06-07 11:12:32] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [161/391] acc1 0.343750 (0.307648) loss 1.820810 (1.927533)\n", + "[2021-06-07 11:12:33] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [171/391] acc1 0.312500 (0.307383) loss 1.800313 (1.924665)\n", + "[2021-06-07 11:12:33] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [181/391] acc1 0.484375 (0.307925) loss 1.637479 (1.920402)\n", + "[2021-06-07 11:12:33] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [191/391] acc1 0.359375 (0.306692) loss 1.732374 (1.917680)\n", + "[2021-06-07 11:12:33] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [201/391] acc1 0.406250 (0.309624) loss 1.870701 (1.911484)\n", + "[2021-06-07 11:12:33] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [211/391] acc1 0.328125 (0.311982) loss 1.785704 (1.905039)\n", + "[2021-06-07 11:12:33] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [221/391] acc1 0.265625 (0.312712) loss 1.738683 (1.901547)\n", + "[2021-06-07 11:12:33] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [231/391] acc1 0.359375 (0.315409) loss 1.827117 (1.894860)\n", + "[2021-06-07 11:12:34] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [241/391] acc1 0.375000 (0.317881) loss 1.717454 (1.888916)\n", + "[2021-06-07 11:12:34] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [251/391] acc1 0.328125 (0.318663) loss 1.873310 (1.886883)\n", + "[2021-06-07 11:12:34] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [261/391] acc1 0.390625 (0.320163) loss 1.657088 (1.881767)\n", + "[2021-06-07 11:12:34] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [271/391] acc1 0.421875 (0.321264) loss 1.710897 (1.877521)\n", + "[2021-06-07 11:12:34] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [281/391] acc1 0.421875 (0.321230) loss 1.760745 (1.875136)\n", + "[2021-06-07 11:12:34] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [291/391] acc1 0.375000 (0.321413) loss 1.669255 (1.872129)\n", + "[2021-06-07 11:12:34] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [301/391] acc1 0.328125 (0.322051) loss 1.728873 (1.868047)\n", + "[2021-06-07 11:12:35] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [311/391] acc1 0.375000 (0.323000) loss 1.754761 (1.864783)\n", + "[2021-06-07 11:12:35] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [321/391] acc1 0.437500 (0.324864) loss 1.666240 (1.859164)\n", + "[2021-06-07 11:12:35] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [331/391] acc1 0.421875 (0.325954) loss 1.661471 (1.856318)\n", + "[2021-06-07 11:12:35] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [341/391] acc1 0.328125 (0.326475) loss 1.737106 (1.853075)\n", + "[2021-06-07 11:12:35] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [351/391] acc1 0.343750 (0.327724) loss 1.789253 (1.849491)\n", + "[2021-06-07 11:12:36] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [361/391] acc1 0.250000 (0.328558) loss 1.773805 (1.846033)\n", + "[2021-06-07 11:12:36] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [371/391] acc1 0.312500 (0.329094) loss 1.901358 (1.844091)\n", + "[2021-06-07 11:12:36] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [381/391] acc1 0.250000 (0.330011) loss 1.863921 (1.841390)\n", + "[2021-06-07 11:12:36] INFO (nni.retiarii.oneshot.pytorch.darts/MainThread) Epoch [2/2] Step [391/391] acc1 0.325000 (0.331514) loss 1.729926 (1.837162)\n" + ] + } + ], + "source": [ + "import torch\n", + "from utils import accuracy\n", + "from torchvision import transforms\n", + "from torchvision.datasets import CIFAR10\n", + "from nni.retiarii.oneshot.pytorch import DartsTrainer\n", + "\n", + "criterion = torch.nn.CrossEntropyLoss()\n", + "optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)\n", + "\n", + "transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])\n", + "train_dataset = CIFAR10(root=\"./data\", train=True, download=True, transform=transform)\n", + "\n", + "trainer = DartsTrainer(\n", + " model=model,\n", + " loss=criterion,\n", + " metrics=lambda output, target: accuracy(output, target),\n", + " optimizer=optimizer,\n", + " num_epochs=2,\n", + " dataset=train_dataset,\n", + " batch_size=64,\n", + " log_frequency=10\n", + " )\n", + "\n", + "trainer.fit()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Similarly, the optimal structure found can be exported." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Final architecture: {'_mutation_1': 1, '_mutation_2': 1, '_mutation_3': [1]}\n" + ] + } + ], + "source": [ + "print('Final architecture:', trainer.export())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/examples/notebooks/tabular_data_classification_in_AML.ipynb b/examples/notebooks/tabular_data_classification_in_AML.ipynb index c37d8aa755..a84b45784a 100644 --- a/examples/notebooks/tabular_data_classification_in_AML.ipynb +++ b/examples/notebooks/tabular_data_classification_in_AML.ipynb @@ -13,7 +13,7 @@ "source": [ "This simple example is to use NNI NAS 2.0(Retiarii) framework to search for the best neural architecture for tabular data classification task in Azure Machine Learning training platform.\n", "\n", - "The video demo is https://www.youtube.com/watch?v=PDVqBmm7Cro and https://www.bilibili.com/video/BV1oy4y1W7GF." + "The video demo is in [YouTube](https://www.youtube.com/watch?v=PDVqBmm7Cro) and [Bilibili](https://www.bilibili.com/video/BV1oy4y1W7GF)." ] }, { @@ -32,48 +32,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "import os\n", - "import torch\n", - "import pandas as pd\n", - "\n", - "from sklearn.preprocessing import LabelEncoder\n", - "from torchvision.datasets.utils import download_url\n", - "\n", - "class TitanicDataset(torch.utils.data.Dataset):\n", - " def __init__(self, root: str, train: bool = True):\n", - " filename = 'train.csv' if train else 'eval.csv'\n", - " if not os.path.exists(os.path.join(root, filename)):\n", - " download_url(os.path.join(\n", - " 'https://storage.googleapis.com/tf-datasets/titanic/', filename), root, filename)\n", - "\n", - " df = pd.read_csv(os.path.join(root, filename))\n", - " object_colunmns = df.select_dtypes(include='object').columns.values\n", - " for idx in df.columns:\n", - " if idx in object_colunmns:\n", - " df[idx] = LabelEncoder().fit_transform(df[idx])\n", - " \n", - " self.x = torch.tensor(df.iloc[:, 1:].values)\n", - " self.y = torch.tensor(df.iloc[:, 0].values)\n", + "from utils import TitanicDataset\n", + "from nni.retiarii import serialize\n", "\n", - " def __len__(self):\n", - " return len(self.y)\n", - "\n", - " def __getitem__(self, idx):\n", - " return self.x[idx], self.y[idx]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "train_dataset = TitanicDataset('./data', train=True)\n", - "test_dataset = TitanicDataset('./data', train=False)" + "train_dataset = serialize(TitanicDataset, root='./data', train=True)\n", + "test_dataset = serialize(TitanicDataset, root='./data', train=False)" ] }, { @@ -154,7 +121,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -223,7 +190,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -248,9 +215,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: False\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 09:56:10] INFO (lightning/MainThread) GPU available: True, used: False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 09:56:10] INFO (lightning/MainThread) TPU available: None, using: 0 TPU cores\n" + ] + } + ], "source": [ "import nni.retiarii.evaluator.pytorch.lightning as pl\n", "\n", @@ -271,12 +267,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "After all the above are prepared, it is time to configure an experiment to do the model search. The basic experiment configuration is as follows: " + "After all the above are prepared, it is time to configure an experiment to do the model search. The basic experiment configuration is as follows, and advanced configuration reference on [this page](https://nni.readthedocs.io/en/stable/reference/experiment_config.html)." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -289,7 +285,6 @@ "exp_config.trial_concurrency = 2\n", "exp_config.max_trial_number = 20\n", "exp_config.max_experiment_duration = '2h'\n", - "exp_config.trial_gpu_number = 1\n", "exp_config.nni_manager_ip = '' # your nni_manager_ip" ] }, @@ -297,7 +292,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Running NNI experiments on the AML(Azure Machine Learning) training service is also simple, you only need to configure the following additional fields:" + "Before running experiments on AML(Azure Machine Learning) training service, you need to set up corresponding environment(refer to [AML mode doc](https://nni.readthedocs.io/en/stable/TrainingService/AMLMode.html)) and configure the following additional fields:" ] }, { @@ -306,12 +301,22 @@ "metadata": {}, "outputs": [], "source": [ - "exp_config.training_service.use_active_gpu = True\n", + "# Authenticate to your Azure subscription from the CLI.\n", + "# If you have finished, please skip it.\n", + "!az login" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ "exp_config.training_service.subscription_id = '' # your subscription id\n", "exp_config.training_service.resource_group = '' # your resource group\n", "exp_config.training_service.workspace_name = '' # your workspace name\n", "exp_config.training_service.compute_target = '' # your compute target\n", - "exp_config.training_service.docker_image = '' # your docker image" + "exp_config.training_service.docker_image = 'msranni/nni:latest' # your docker image" ] }, { @@ -332,11 +337,640 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 09:56:54] INFO (nni.experiment/MainThread) Creating experiment, Experiment ID: 46den9qr\n", + "[2021-06-08 09:56:55] INFO (nni.experiment/MainThread) Connecting IPC pipe...\n", + "[2021-06-08 09:56:58] INFO (nni.experiment/MainThread) Starting web server...\n", + "[2021-06-08 09:57:00] INFO (nni.experiment/MainThread) Setting up...\n", + "[2021-06-08 09:57:05] INFO (nni.runtime.msg_dispatcher_base/Thread-8) Dispatcher started\n", + "[2021-06-08 09:57:05] INFO (nni.retiarii.experiment.pytorch/MainThread) Web UI URLs: http://127.0.0.1:8745\n", + "[2021-06-08 09:57:05] INFO (nni.retiarii.experiment.pytorch/MainThread) Start strategy...\n", + "[2021-06-08 09:57:05] INFO (nni.retiarii.strategy.tpe_strategy/MainThread) TPE strategy has been started.\n", + "[2021-06-08 09:57:05] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.001999 seconds\n", + "[2021-06-08 09:57:05] INFO (hyperopt.tpe/MainThread) TPE using 0 trials\n", + "[2021-06-08 09:57:10] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.002029 seconds\n", + "[2021-06-08 09:57:10] INFO (hyperopt.tpe/MainThread) TPE using 0 trials\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: False\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:03:55] INFO (lightning/Thread-5) GPU available: True, used: False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:03:55] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n", + "[2021-06-08 10:03:56] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.000000 seconds\n", + "[2021-06-08 10:03:56] INFO (hyperopt.tpe/MainThread) TPE using 1/1 trials with best loss 0.795455\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: False\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:04:46] INFO (lightning/Thread-5) GPU available: True, used: False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:04:46] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n", + "[2021-06-08 10:04:46] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.000000 seconds\n", + "[2021-06-08 10:04:46] INFO (hyperopt.tpe/MainThread) TPE using 2/2 trials with best loss 0.795455\n", + "[2021-06-08 10:04:50] WARNING (nni.runtime.msg_dispatcher_base/Thread-8) assessor queue length: 20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: False\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:04:51] INFO (lightning/Thread-5) GPU available: True, used: False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:04:51] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n", + "[2021-06-08 10:04:52] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.000000 seconds\n", + "[2021-06-08 10:04:52] INFO (hyperopt.tpe/MainThread) TPE using 3/3 trials with best loss 0.795455\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: False\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:05:46] INFO (lightning/Thread-5) GPU available: True, used: False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:05:46] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n", + "[2021-06-08 10:05:48] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.002999 seconds\n", + "[2021-06-08 10:05:48] INFO (hyperopt.tpe/MainThread) TPE using 4/4 trials with best loss 0.791667\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: False\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:05:56] INFO (lightning/Thread-5) GPU available: True, used: False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:05:56] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n", + "[2021-06-08 10:05:56] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.000000 seconds\n", + "[2021-06-08 10:05:56] INFO (hyperopt.tpe/MainThread) TPE using 5/5 trials with best loss 0.791667\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: False\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:06:26] INFO (lightning/Thread-5) GPU available: True, used: False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:06:26] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n", + "[2021-06-08 10:06:27] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.004991 seconds\n", + "[2021-06-08 10:06:27] INFO (hyperopt.tpe/MainThread) TPE using 6/6 trials with best loss 0.791667\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: False\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:07:06] INFO (lightning/Thread-5) GPU available: True, used: False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:07:06] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n", + "[2021-06-08 10:07:07] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.006043 seconds\n", + "[2021-06-08 10:07:07] INFO (hyperopt.tpe/MainThread) TPE using 7/7 trials with best loss 0.784091\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: False\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:07:56] INFO (lightning/Thread-5) GPU available: True, used: False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:07:56] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n", + "[2021-06-08 10:07:57] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.006004 seconds\n", + "[2021-06-08 10:07:57] INFO (hyperopt.tpe/MainThread) TPE using 8/8 trials with best loss 0.731061\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: False\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:08:01] INFO (lightning/Thread-5) GPU available: True, used: False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:08:01] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n", + "[2021-06-08 10:08:01] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.005000 seconds\n", + "[2021-06-08 10:08:01] INFO (hyperopt.tpe/MainThread) TPE using 9/9 trials with best loss 0.731061\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: False\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:08:56] INFO (lightning/Thread-5) GPU available: True, used: False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:08:56] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n", + "[2021-06-08 10:08:58] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.004962 seconds\n", + "[2021-06-08 10:08:58] INFO (hyperopt.tpe/MainThread) TPE using 10/10 trials with best loss 0.731061\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: False\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:09:01] INFO (lightning/Thread-5) GPU available: True, used: False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:09:01] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n", + "[2021-06-08 10:09:03] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.003043 seconds\n", + "[2021-06-08 10:09:03] INFO (hyperopt.tpe/MainThread) TPE using 11/11 trials with best loss 0.731061\n", + "[2021-06-08 10:10:24] WARNING (nni.runtime.msg_dispatcher_base/Thread-8) assessor queue length: 20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: False\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:10:27] INFO (lightning/Thread-5) GPU available: True, used: False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:10:27] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n", + "[2021-06-08 10:10:28] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.002005 seconds\n", + "[2021-06-08 10:10:28] INFO (hyperopt.tpe/MainThread) TPE using 12/12 trials with best loss 0.731061\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: False\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:10:52] INFO (lightning/Thread-5) GPU available: True, used: False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:10:52] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n", + "[2021-06-08 10:10:53] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.122046 seconds\n", + "[2021-06-08 10:10:53] INFO (hyperopt.tpe/MainThread) TPE using 13/13 trials with best loss 0.731061\n", + "[2021-06-08 10:14:52] WARNING (nni.runtime.msg_dispatcher_base/Thread-8) assessor queue length: 20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: False\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:14:52] INFO (lightning/Thread-5) GPU available: True, used: False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:14:52] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n", + "[2021-06-08 10:14:53] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.002038 seconds\n", + "[2021-06-08 10:14:53] INFO (hyperopt.tpe/MainThread) TPE using 14/14 trials with best loss 0.731061\n", + "[2021-06-08 10:14:56] WARNING (nni.runtime.msg_dispatcher_base/Thread-8) assessor queue length: 20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: False\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:14:57] INFO (lightning/Thread-5) GPU available: True, used: False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:14:57] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n", + "[2021-06-08 10:14:58] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.005870 seconds\n", + "[2021-06-08 10:14:58] INFO (hyperopt.tpe/MainThread) TPE using 15/15 trials with best loss 0.731061\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: False\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:16:07] INFO (lightning/Thread-5) GPU available: True, used: False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:16:07] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n", + "[2021-06-08 10:16:08] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.004999 seconds\n", + "[2021-06-08 10:16:08] INFO (hyperopt.tpe/MainThread) TPE using 16/16 trials with best loss 0.712121\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: False\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:16:48] INFO (lightning/Thread-5) GPU available: True, used: False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:16:48] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n", + "[2021-06-08 10:16:48] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.002000 seconds\n", + "[2021-06-08 10:16:48] INFO (hyperopt.tpe/MainThread) TPE using 17/17 trials with best loss 0.712121\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: False\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:16:53] INFO (lightning/Thread-5) GPU available: True, used: False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:16:53] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n", + "[2021-06-08 10:16:55] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.002010 seconds\n", + "[2021-06-08 10:16:55] INFO (hyperopt.tpe/MainThread) TPE using 18/18 trials with best loss 0.712121\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: False\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:17:43] INFO (lightning/Thread-5) GPU available: True, used: False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:17:43] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n", + "[2021-06-08 10:17:44] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.006001 seconds\n", + "[2021-06-08 10:17:44] INFO (hyperopt.tpe/MainThread) TPE using 19/19 trials with best loss 0.712121\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True, used: False\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:18:03] INFO (lightning/Thread-5) GPU available: True, used: False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: None, using: 0 TPU cores\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-06-08 10:18:03] INFO (lightning/Thread-5) TPU available: None, using: 0 TPU cores\n", + "[2021-06-08 10:18:04] INFO (hyperopt.tpe/MainThread) tpe_transform took 0.002009 seconds\n", + "[2021-06-08 10:18:04] INFO (hyperopt.tpe/MainThread) TPE using 20/20 trials with best loss 0.712121\n", + "[2021-06-08 10:18:12] INFO (nni.retiarii.experiment.pytorch/Thread-9) Stopping experiment, please wait...\n", + "[2021-06-08 10:18:14] INFO (nni.runtime.msg_dispatcher_base/Thread-8) Dispatcher exiting...\n", + "[2021-06-08 10:18:14] INFO (nni.retiarii.experiment.pytorch/MainThread) Strategy exit\n", + "[2021-06-08 10:18:14] INFO (nni.retiarii.experiment.pytorch/MainThread) Waiting for experiment to become DONE (you can ctrl+c if there is no running trial jobs)...\n", + "[2021-06-08 10:18:15] INFO (nni.retiarii.experiment.pytorch/Thread-9) Experiment stopped\n", + "[2021-06-08 10:18:16] INFO (nni.runtime.msg_dispatcher_base/Thread-8) Dispatcher terminiated\n" + ] + } + ], "source": [ - "exp.run(exp_config, 8081 + random.randint(0, 100))" + "exp.run(exp_config, 8745)" ] }, { @@ -355,9 +989,51 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Final model:\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "\n", + "import nni.retiarii.nn.pytorch\n", + "\n", + "import torch\n", + "\n", + "\n", + "class _model(nn.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.__fc1 = torch.nn.modules.linear.Linear(in_features=9, out_features=512)\n", + " self.__bn1 = torch.nn.modules.batchnorm.BatchNorm1d(num_features=512)\n", + " self.__dropout1 = torch.nn.modules.dropout.Dropout(p=0.0)\n", + " self.__fc2 = torch.nn.modules.linear.Linear(in_features=512, out_features=128)\n", + " self.__bn2 = torch.nn.modules.batchnorm.BatchNorm1d(num_features=128)\n", + " self.__dropout2 = torch.nn.modules.dropout.Dropout(p=0.25)\n", + " self.__fc3 = torch.nn.modules.linear.Linear(in_features=128, out_features=2)\n", + "\n", + " def forward(self, x__1):\n", + " __Constant3 = False\n", + " __fc1 = self.__fc1(x__1)\n", + " __bn1 = self.__bn1(__fc1)\n", + " __relu7 = F.relu(__bn1, __Constant3)\n", + " __dropout1 = self.__dropout1(__relu7)\n", + " __fc2 = self.__fc2(__dropout1)\n", + " __bn2 = self.__bn2(__fc2)\n", + " __relu11 = F.relu(__bn2, __Constant3)\n", + " __dropout2 = self.__dropout2(__relu11)\n", + " __fc3 = self.__fc3(__dropout2)\n", + " __sigmoid13 = F.sigmoid(__fc3)\n", + " return __sigmoid13\n" + ] + } + ], "source": [ "print('Final model:')\n", "for model_code in exp.export_top_models():\n", @@ -388,7 +1064,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.8" + "version": "3.8.5" } }, "nbformat": 4, diff --git a/examples/notebooks/utils.py b/examples/notebooks/utils.py new file mode 100644 index 0000000000..64efb24471 --- /dev/null +++ b/examples/notebooks/utils.py @@ -0,0 +1,32 @@ +import os +import torch +import pandas as pd + +from sklearn.preprocessing import LabelEncoder +from torchvision.datasets.utils import download_url + +class TitanicDataset(torch.utils.data.Dataset): + def __init__(self, root: str, train: bool = True): + filename = 'train.csv' if train else 'eval.csv' + if not os.path.exists(os.path.join(root, filename)): + download_url(os.path.join('https://storage.googleapis.com/tf-datasets/titanic/', filename), root, filename) + + df = pd.read_csv(os.path.join(root, filename)) + object_colunmns = df.select_dtypes(include='object').columns.values + for idx in df.columns: + if idx in object_colunmns: + df[idx] = LabelEncoder().fit_transform(df[idx]) + + self.x = df.iloc[:, 1:].values + self.y = df.iloc[:, 0].values + + def __len__(self): + return len(self.y) + + def __getitem__(self, idx): + return torch.Tensor(self.x[idx]), self.y[idx] + +def accuracy(output, target): + batch_size = target.size(0) + _, predicted = torch.max(output.data, 1) + return {"acc1": (predicted == target).sum().item() / batch_size} \ No newline at end of file