microsoft · liuzhe-lz · Oct 21, 2019 · Oct 16, 2019 · Oct 16, 2019 · Oct 18, 2019
diff --git a/docs/en_US/Compressor/AutoCompression.md b/docs/en_US/Compressor/AutoCompression.md
@@ -1,3 +1,118 @@
 # Automatic Model Compression on NNI
 
-TBD.
+It's convenient to implement auto model compression with NNI compression and NNI tuners
+
+## First, model compression with NNI
+
+You can easily compress a model with NNI compression. Take pruning for example, you can prune a pretrained model with LevelPruner like this
+
+```python
+from nni.compression.torch import LevelPruner
+config_list = [{ 'sparsity': 0.8, 'op_types': 'default' }]
+pruner = LevelPruner(config_list)
+pruner(model)
+```
+
+```{ 'sparsity': 0.8, 'op_types': 'default' }```means that **all layers with weight will be compressed with the same 0.8 sparsity**. When ```pruner(model)``` called, the model is compressed with masks and after that you can normally fine tune this model and **pruned weights won't be updated** which have been masked.
+
+## Then, make this automatic
+
+The previous example manually choosed LevelPruner and pruned all layers with the same sparsity, this is obviously sub-optimal because different layers may have different redundancy. Layer sparsity should be carefully tuned to achieve least model performance degradation and this can be done with NNI tuners.
+
+The first thing we need to do is to design a search space, here we use a nested search space which contains  choosing pruning algorithm and optimizing layer sparsity.
+
+```json
+{
+  "prune_method": {
+    "_type": "choice",
+    "_value": [
+      {
+        "_name": "agp",
+        "conv0_sparsity": {
+          "_type": "uniform",
+          "_value": [
+            0.1,
+            0.9
+          ]
+        },
+        "conv1_sparsity": {
+          "_type": "uniform",
+          "_value": [
+            0.1,
+            0.9
+          ]
+        },
+      },
+      {
+        "_name": "level",
+        "conv0_sparsity": {
+          "_type": "uniform",
+          "_value": [
+            0.1,
+            0.9
+          ]
+        },
+        "conv1_sparsity": {
+          "_type": "uniform",
+          "_value": [
+            0.01,
+            0.9
+          ]
+        },
+      }
+    ]
+  }
+}
+```
+
+Then we need to modify our codes for few lines
+
+```python
+import nni
+from nni.compression.torch import *
+params = nni.get_parameters()
+conv0_sparsity = params['prune_method']['conv0_sparsity']
+conv1_sparsity = params['prune_method']['conv1_sparsity']
+# these raw sparsity should be scaled if you need total sparsity constrained
+config_list_level = [{ 'sparsity': conv0_sparsity, 'op_name': 'conv0' },
+                     { 'sparsity': conv1_sparsity, 'op_name': 'conv1' }]
+config_list_agp = [{'initial_sparsity': 0, 'final_sparsity': conv0_sparsity,
+                    'start_epoch': 0, 'end_epoch': 3,
+                    'frequency': 1,'op_name': 'conv0' },
+                   {'initial_sparsity': 0, 'final_sparsity': conv1_sparsity,
+                    'start_epoch': 0, 'end_epoch': 3,
+                    'frequency': 1,'op_name': 'conv1' },]
+PRUNERS = {'level':LevelPruner(config_list_level)，'agp':AGP_Pruner(config_list_agp)}
+pruner = PRUNERS(params['prune_method']['_name'])
+pruner(model)
+... # fine tuning
+acc = evaluate(model) # evaluation
+nni.report_final_results(acc)
+```
+
+Last, define our task and automatically tuning pruning methods with layers sparsity
+
+```yaml
+authorName: default
+experimentName: Auto_Compression
+trialConcurrency: 2
+maxExecDuration: 100h
+maxTrialNum: 500
+#choice: local, remote, pai
+trainingServicePlatform: local
+#choice: true, false
+useAnnotation: False
+searchSpacePath: search_space.json
+tuner:
+  #choice: TPE, Random, Anneal...
+  builtinTunerName: TPE
+  classArgs:
+    #choice: maximize, minimize
+    optimize_mode: maximize
+trial:
+  command: bash run_prune.sh
+  codeDir: .
+  gpuNum: 1
+
+```
+
diff --git a/docs/en_US/Compressor/Overview.md b/docs/en_US/Compressor/Overview.md
@@ -8,7 +8,6 @@ We have provided two naive compression algorithms and four popular ones for user
 |---|---|
 | [Level Pruner](./Pruner.md#level-pruner) | Pruning the specified ratio on each weight based on absolute values of weights |
 | [AGP Pruner](./Pruner.md#agp-pruner) | Automated gradual pruning (To prune, or not to prune: exploring the efficacy of pruning for model compression) [Reference Paper](https://arxiv.org/abs/1710.01878)|
-| [Sensitivity Pruner](./Pruner.md#sensitivity-pruner) | Learning both Weights and Connections for Efficient Neural Networks. [Reference Paper](https://arxiv.org/abs/1506.02626)|
 | [Naive Quantizer](./Quantizer.md#naive-quantizer) |  Quantize weights to default 8 bits |
 | [QAT Quantizer](./Quantizer.md#qat-quantizer) | Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference. [Reference Paper](http://openaccess.thecvf.com/content_cvpr_2018/papers/Jacob_Quantization_and_Training_CVPR_2018_paper.pdf)|
 | [DoReFa Quantizer](./Quantizer.md#dorefa-quantizer) | DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients. [Reference Paper](https://arxiv.org/abs/1606.06160)|

diff --git a/docs/en_US/Compressor/Pruner.md b/docs/en_US/Compressor/Pruner.md
@@ -48,7 +48,7 @@ from nni.compression.tensorflow import AGP_Pruner
 config_list = [{
     'initial_sparsity': 0,
     'final_sparsity': 0.8,
-    'start_epoch': 1,
+    'start_epoch': 0,
     'end_epoch': 10,
     'frequency': 1,
     'op_types': 'default'
@@ -62,7 +62,7 @@ from nni.compression.torch import AGP_Pruner
 config_list = [{
     'initial_sparsity': 0,
     'final_sparsity': 0.8,
-    'start_epoch': 1,
+    'start_epoch': 0,
     'end_epoch': 10,
     'frequency': 1,
     'op_types': 'default'
@@ -92,41 +92,3 @@ You can view example for more information
 
 ***
 
-## Sensitivity Pruner
-In [Learning both Weights and Connections for Efficient Neural Networks](https://arxiv.org/abs/1506.02626), author Song Han and provide an algorithm to find the sensitivity of each layer and set the pruning threshold to each layer.
-
->We used the sensitivity results to find each layer’s threshold: for example, the smallest threshold was applied to the most sensitive layer, which is the first convolutional layer... The pruning threshold is chosen as a quality parameter multiplied by the standard deviation of a layer’s weights
-
-### Usage
-You can prune weight step by step and reach one target sparsity by Sensitivity Pruner with the code below.
-
-Tensorflow code
-```python
-from nni.compression.tensorflow import SensitivityPruner
-config_list = [{ 'sparsity':0.8, 'op_types': 'default' }]
-pruner = SensitivityPruner(config_list)
-pruner(tf.get_default_graph())
-```
-PyTorch code
-```python
-from nni.compression.torch import SensitivityPruner
-config_list = [{ 'sparsity':0.8, 'op_types': 'default' }]
-pruner = SensitivityPruner(config_list)
-pruner(model)
-```
-Like AGP Pruner, you should update mask information every epoch by adding code below
-
-Tensorflow code 
-```python
-pruner.update_epoch(epoch, sess)
-```
-PyTorch code
-```python
-pruner.update_epoch(epoch)
-```
-You can view example for more information
-
-#### User configuration for Sensitivity Pruner
-* **sparsity:** This is to specify the sparsity operations to be compressed to
-
-***
diff --git a/examples/model_compress/README.md b/examples/model_compress/README.md
@@ -0,0 +1,48 @@
+# Run model compression examples
+
+You can run these examples easily like this, take torch pruning for example
+
+```bash
+python main_torch_pruner.py
+```
+
+This example uses AGP Pruner. Initiating a pruner needs a user provided configuration which can be provided in two ways:
+
+- By reading ```configure_example.yaml```, this can make code clean when your configuration is complicated
+- Directly config in your codes
+
+In our example, we simply config model compression in our codes like this
+
+```python
+configure_list = [{
+    'initial_sparsity': 0,
+    'final_sparsity': 0.8,
+    'start_epoch': 0,
+    'end_epoch': 10,
+    'frequency': 1,
+    'op_type': 'default'
+}]
+pruner = AGP_Pruner(configure_list)
+```
+
+When ```pruner(model)``` is called, your model is injected with masks as embedded operations. For example, a layer takes a weight as input, we will insert an operation between the weight and the layer, this operation takes the weight as input and outputs a new weight applied by the mask. Thus, the masks are applied at any time the computation goes through the operations. You can fine-tune your model **without** any modifications.
+
+```python
+for epoch in range(10):
+    # update_epoch is for pruner to be aware of epochs, so that it could adjust masks during training.
+    pruner.update_epoch(epoch)
+    print('# Epoch {} #'.format(epoch))
+    train(model, device, train_loader, optimizer)
+    test(model, device, test_loader)
+```
+
+When fine tuning finished,  pruned weights are all masked and you can get masks like this
+
+```
+masks = pruner.mask_list
+layer_name = xxx
+mask = masks[layer_name]
+```
+
+
+
diff --git a/examples/model_compress/configure_example.yaml b/examples/model_compress/configure_example.yaml
@@ -1,7 +1,7 @@
 AGPruner: 
   config:
     -
-        start_epoch: 1
+        start_epoch: 0
         end_epoch: 10
         frequency: 1
         initial_sparsity: 0.05

diff --git a/examples/model_compress/main_tf_pruner.py b/examples/model_compress/main_tf_pruner.py
@@ -79,8 +79,8 @@ def main():
 
     model = Mnist()
 
-    '''you can change this to SensitivityPruner to implement it
-    pruner = SensitivityPruner(configure_list)
+    '''you can change this to LevelPruner to implement it
+    pruner = LevelPruner(configure_list)
     '''
     configure_list = [{
                         'initial_sparsity': 0,

diff --git a/examples/model_compress/main_torch_pruner.py b/examples/model_compress/main_torch_pruner.py
@@ -20,7 +20,7 @@ def forward(self, x):
         x = x.view(-1, 4 * 4 * 50)
         x = F.relu(self.fc1(x))
         x = self.fc2(x)
-        return F.log_softmax(x, dim = 1)
+        return F.log_softmax(x, dim=1)
 
 
 def train(model, device, train_loader, optimizer):
@@ -35,6 +35,7 @@ def train(model, device, train_loader, optimizer):
         if batch_idx % 100 == 0:
             print('{:2.0f}%  Loss {}'.format(100 * batch_idx / len(train_loader), loss.item()))
 
+
 def test(model, device, test_loader):
     model.eval()
     test_loss = 0
@@ -43,52 +44,52 @@ def test(model, device, test_loader):
         for data, target in test_loader:
             data, target = data.to(device), target.to(device)
             output = model(data)
-            test_loss += F.nll_loss(output, target, reduction = 'sum').item()
-            pred = output.argmax(dim = 1, keepdim = True)
+            test_loss += F.nll_loss(output, target, reduction='sum').item()
+            pred = output.argmax(dim=1, keepdim=True)
             correct += pred.eq(target.view_as(pred)).sum().item()
     test_loss /= len(test_loader.dataset)
 
     print('Loss: {}  Accuracy: {}%)\n'.format(
         test_loss, 100 * correct / len(test_loader.dataset)))
 
+
 def main():
     torch.manual_seed(0)
     device = torch.device('cpu')
 
     trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
     train_loader = torch.utils.data.DataLoader(
-        datasets.MNIST('data', train = True, download = True, transform = trans),
-        batch_size = 64, shuffle = True)
+        datasets.MNIST('data', train=True, download=True, transform=trans),
+        batch_size=64, shuffle=True)
     test_loader = torch.utils.data.DataLoader(
-        datasets.MNIST('data', train = False, transform = trans),
-        batch_size = 1000, shuffle = True)
+        datasets.MNIST('data', train=False, transform=trans),
+        batch_size=1000, shuffle=True)
 
     model = Mnist()
-    
-    '''you can change this to SensitivityPruner to implement it
-    pruner = SensitivityPruner(configure_list)
+
+    '''you can change this to LevelPruner to implement it
+    pruner = LevelPruner(configure_list)
     '''
     configure_list = [{
-                        'initial_sparsity': 0,
-                        'final_sparsity': 0.8,
-                        'start_epoch': 1,
-                        'end_epoch': 10,
-                        'frequency': 1,
-                        'op_type': 'default'
-                    }]
+        'initial_sparsity': 0,
+        'final_sparsity': 0.8,
+        'start_epoch': 0,
+        'end_epoch': 10,
+        'frequency': 1,
+        'op_type': 'default'
+    }]
 
     pruner = AGP_Pruner(configure_list)
     pruner(model)
     # you can also use compress(model) method
     # like that pruner.compress(model)
 
-    optimizer = torch.optim.SGD(model.parameters(), lr = 0.01, momentum = 0.5)
+    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
     for epoch in range(10):
+        pruner.update_epoch(epoch)
         print('# Epoch {} #'.format(epoch))
         train(model, device, train_loader, optimizer)
         test(model, device, test_loader)
-
-        pruner.update_epoch(epoch)
 
 
 if __name__ == '__main__':