This repository has been archived by the owner on Sep 18, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* refactor compression sdk * bugfix * bugfix * update ut
- Loading branch information
Showing
23 changed files
with
584 additions
and
785 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,4 +6,4 @@ AGPruner: | |
frequency: 1 | ||
initial_sparsity: 0.05 | ||
final_sparsity: 0.8 | ||
support_type: 'default' | ||
op_type: 'default' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from .compressor import LayerInfo, Compressor, Pruner, Quantizer | ||
from .builtin_pruners import * | ||
from .builtin_quantizers import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
74 changes: 74 additions & 0 deletions
74
src/sdk/pynni/nni/compression/tensorflow/builtin_quantizers.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
import logging | ||
import tensorflow as tf | ||
from .compressor import Quantizer | ||
|
||
__all__ = [ 'NaiveQuantizer', 'QAT_Quantizer', 'DoReFaQuantizer' ] | ||
|
||
_logger = logging.getLogger(__name__) | ||
|
||
|
||
class NaiveQuantizer(Quantizer): | ||
""" | ||
quantize weight to 8 bits | ||
""" | ||
def __init__(self, config_list): | ||
super().__init__(config_list) | ||
self.layer_scale = { } | ||
|
||
def quantize_weight(self, layer, weight, config): | ||
new_scale = tf.reduce_max(tf.abs(weight)) / 127 | ||
scale = tf.maximum(self.layer_scale.get(layer.name, tf.constant(0.0)), new_scale) | ||
self.layer_scale[layer.name] = scale | ||
orig_type = weight.dtype | ||
return tf.cast(tf.cast(weight / scale, tf.int8), orig_type) * scale | ||
|
||
|
||
class QAT_Quantizer(Quantizer): | ||
""" | ||
Quantizer using the DoReFa scheme, as defined in: | ||
Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference | ||
http://openaccess.thecvf.com/content_cvpr_2018/papers/Jacob_Quantization_and_Training_CVPR_2018_paper.pdf | ||
""" | ||
def __init__(self, config_list): | ||
""" | ||
Configure Args: | ||
q_bits | ||
""" | ||
super().__init__(config_list) | ||
|
||
def quantize_weight(self, layer, weight, config): | ||
a = tf.stop_gradient(tf.reduce_min(weight)) | ||
b = tf.stop_gradient(tf.reduce_max(weight)) | ||
n = tf.cast(2 ** config['q_bits'], tf.float32) | ||
scale = b-a/(n-1) | ||
|
||
# use gradient_override_map to change round to idetity for gradient | ||
with tf.get_default_graph().gradient_override_map({'Round': 'Identity'}): | ||
qw = tf.round((weight-a)/scale)*scale +a | ||
|
||
return qw | ||
|
||
|
||
class DoReFaQuantizer(Quantizer): | ||
""" | ||
Quantizer using the DoReFa scheme, as defined in: | ||
Zhou et al., DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients | ||
(https://arxiv.org/abs/1606.06160) | ||
""" | ||
def __init__(self, config_list): | ||
""" | ||
Configure Args: | ||
q_bits | ||
""" | ||
super().__init__(config_list) | ||
|
||
def quantize_weight(self, layer, weight, config): | ||
a = tf.math.tanh(weight) | ||
b = a/(2*tf.reduce_max(tf.abs(weight))) + 0.5 | ||
|
||
scale = pow(2, config['q_bits'] - 1) | ||
# use gradient_override_map to change round to idetity for gradient | ||
with tf.get_default_graph().gradient_override_map({'Round': 'Identity'}): | ||
qw = tf.round(b*scale)/scale | ||
r_qw = 2 * qw - 1 | ||
return r_qw |
Oops, something went wrong.