From ab184de943a78fa5c6cc26facf567c4b887b03e5 Mon Sep 17 00:00:00 2001
From: Antonio <antonio@leapmind.io>
Date: Thu, 29 Nov 2018 11:53:52 +0900
Subject: [PATCH 01/45] [WIP] Added pattern matching and new optimization
 passes

---
 dlk/python/dlk/core/graph.py                  |   4 +-
 dlk/python/dlk/core/graph_pattern_matching.py | 119 +++++++++
 dlk/python/dlk/core/operators.py              |  72 ++++++
 dlk/python/dlk/core/optimizer.py              |   3 +-
 dlk/python/dlk/plugins/tf.py                  |  29 ++-
 dlk/python/dlk/scripts/generate_project.py    | 230 ++++++++++++++++++
 dlk/python/dlk/templates/Makefile.tpl         |   8 +-
 7 files changed, 447 insertions(+), 18 deletions(-)
 create mode 100644 dlk/python/dlk/core/graph_pattern_matching.py

diff --git a/dlk/python/dlk/core/graph.py b/dlk/python/dlk/core/graph.py
index a76cf3367..0ff8fc0c4 100644
--- a/dlk/python/dlk/core/graph.py
+++ b/dlk/python/dlk/core/graph.py
@@ -93,9 +93,9 @@ def remove_op(self, op: Operator) -> None:
             del self.__op_type_list[t][i]
 
     @property
-    def operartors(self) -> List[Operator]:
+    def operators(self) -> List[Operator]:
         """List up all operators in this graph."""
-        return list(self.__ops.keys())
+        return list(self.__ops.values())
 
     def get_inputs(self) -> List[Operator]:
         return list(self.__op_type_list['Input'])
diff --git a/dlk/python/dlk/core/graph_pattern_matching.py b/dlk/python/dlk/core/graph_pattern_matching.py
new file mode 100644
index 000000000..8ae3840a2
--- /dev/null
+++ b/dlk/python/dlk/core/graph_pattern_matching.py
@@ -0,0 +1,119 @@
+# -*- coding: utf-8 -*-
+# Copyright 2018 The Blueoil Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+"""Graph pattern matching module."""
+
+from core.operators import Operator
+from core.graph import Graph
+
+
+class Pattern:
+    def __init__(self, op=str(), inputs=list()):
+        self.op = op
+        self.inputs = inputs
+
+
+class NodeMatch:
+    def __init__(self):
+        self.node = None
+        self.inputs = list()
+
+
+def sort_graph(graph, exec_list):
+    for node in graph.operators:
+        node.visited = False
+
+    input_nodes = list()
+    for node in graph.operators:
+        input_nodes += [n.name for n in node.input_nodes]
+
+    output_nodes = list()
+    for node in graph.operators:
+        if node not in input_nodes:
+            output_nodes.append(node)
+
+    for node in output_nodes:
+        top_order(node, exec_list)
+
+
+def top_order(output_node, exec_list):
+    if output_node.visited:
+        return
+    for input_node in output_node.input_nodes:
+        top_order(input_node, exec_list)
+
+    exec_list.append(output_node)
+    output_node.visited = True
+
+
+def match_to_execution_list(match, execution_list):
+    for input_node in match.inputs:
+        match_to_execution_list(input_node, execution_list)
+    execution_list.append(match.node)
+
+
+class GraphMatcher:
+    def __init__(self, input_graph=Graph()):
+        self.graph_node_list = list()
+        sort_graph(input_graph, self.graph_node_list)
+
+        self._node_map = {node.name: node for node in self.graph_node_list}
+
+    def record_matched_nodes(self, match, matched_nodes):
+        matched_nodes.add(match.node.name)
+        for input_node in match.inputs:
+            self.record_matched_nodes(input_node, matched_nodes)
+
+    def get_op_type_matches(self, pattern, matches):
+        matched_nodes = set()
+        for node in self.graph_node_list:
+            if node in matched_nodes:
+                continue
+
+            match = NodeMatch()
+            if self.does_op_type_match(node, pattern, matched_nodes, match):
+                self.record_matched_nodes(match, matched_nodes)
+                matches.append(match)
+
+    def does_op_type_match(self, node, pattern, previously_matched_nodes, match):
+        if node.name in previously_matched_nodes:
+            return False
+
+        pattern_matched = False
+        if pattern.op == '*':
+            pattern_matched = True
+        else:
+            for pattern_op in pattern.op.split('|'):
+                if node.op_type == pattern_op:
+                    pattern_matched = True
+        if not pattern_matched:
+            return False
+
+        match.node = node
+        if not pattern.inputs:
+            return True
+        if len(node.input_nodes) != len(pattern.inputs):
+            return False
+
+        for i in range(len(pattern.inputs)):
+            input_node = self._node_map[node.input_nodes[i].name]
+            input_pattern = pattern.inputs[i]
+            input_match = NodeMatch()
+            match.inputs.append(input_match)
+
+            if not self.does_op_type_match(input_node, input_pattern, previously_matched_nodes, input_match):
+                return False
+
+        return True
diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py
index 4f47d6187..09c5d39d0 100644
--- a/dlk/python/dlk/core/operators.py
+++ b/dlk/python/dlk/core/operators.py
@@ -56,6 +56,8 @@ def __init__(self,
         self._check_consistency()
         self._rank = len(shape)
         self._available_buffer = ''
+        self._visited = False
+        self._prop_details = Dict
 
     def __update_shape(self, shape: List[int], dimension_format: str) -> None:
         self._shape: List[int] = shape
@@ -170,6 +172,19 @@ def input_names(cls) -> List[str]:
         """
         return cls._input_names
 
+    @property
+    def input_nodes(self) -> List['Operator']:
+        """Return a list of input operators in proper order (original protobuf argument order).
+
+        Returns
+        -------
+        ops : List of operators
+            This list is already ordered following the order of the arguments in the original
+             protobuf operators (positional order in the list of arguments).
+
+        """
+        return [self._input_ops[i] for i in self.input_names if self.input_ops.get(i)]
+
     @property
     def output_ops(self) -> OutOps:
         """Return a dict of output operators.
@@ -545,6 +560,22 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li
         """
         raise NotImplementedError(f'operator {cls.__name__} cannot infer its shape.')
 
+    @property
+    def visited(self) -> bool:
+        return self._visited
+
+    @visited.setter
+    def visited(self, v: Bool) -> None:
+        self._visited = v
+
+    @property
+    def run_it_will_lose_information(self) -> bool:
+        return False
+
+    @property
+    def preserve_quantization(self) -> bool:
+        return False
+
 
 class Variable(Operator):
     """Variable class, which must be Input, Output or a constant."""
@@ -694,6 +725,10 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li
                     attrs: Dict[str, Any]) -> List[int]:
         return lists['input']
 
+    @property
+    def preserve_quantization(self) -> bool:
+        return True
+
 
 class Quantizer(Operator):
     """Base class for quantizers."""
@@ -740,6 +775,11 @@ def binarizer(self, data: np.ndarray) -> np.ndarray:
         raise NotImplementedError(
             f'operator {self.op_type} need to implement the binarizer method')
 
+    @property
+    def run_it_will_lose_information(self) -> bool:
+        return True
+
+
 
 class QTZ_binary_mean_scaling(Quantizer):
     """Quantization operator using binary scaling.
@@ -853,6 +893,10 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li
                     attrs: Dict[str, Any]) -> List[int]:
         return lists['input']
 
+    @property
+    def preserve_quantization(self) -> bool:
+        return True
+
 
 class Transpose(Operator):
     """Transpose operator.
@@ -915,6 +959,10 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li
         perm = attrs['perm']
         return [lists['data'][i] for i in perm]
 
+    @property
+    def preserve_quantization(self) -> bool:
+        return True
+
 
 class Conv(Operator):
     """Convolution operator.
@@ -1241,6 +1289,10 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li
         NCHW = [N, C, H, W]
         return [NCHW[i] for i in [format.index(s) for s in 'NCHW']]
 
+    @property
+    def preserve_quantization(self) -> bool:
+        return True
+
 
 class BatchNormalization(Operator):
     """Batch normalization operator.
@@ -1812,6 +1864,10 @@ def run_forward(self) -> np.ndarray:
     def is_monotonic(self) -> bool:
         return False
 
+    @property
+    def preserve_quantization(self) -> bool:
+        return True
+
 
 class Softmax(Operator):
     r"""Softmax operator.
@@ -1957,6 +2013,10 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li
     def is_monotonic(self) -> bool:
         return False
 
+    @property
+    def preserve_quantization(self) -> bool:
+        return True
+
 
 class Dropout(Operator):
     """Dropout operator.
@@ -2313,6 +2373,10 @@ def _dispatch_name(self) -> str:
     def is_monotonic(self) -> bool:
         return False
 
+    @property
+    def preserve_quantization(self) -> bool:
+        return True
+
 
 class Maximum(Operator):
     """Maximum operator.
@@ -2411,6 +2475,10 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li
                     attrs: Dict[str, Any]) -> List[int]:
         return lists['input']
 
+    @property
+    def preserve_quantization(self) -> bool:
+        return True
+
 
 class Split(Operator):
     """Split operator.
@@ -2484,3 +2552,7 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li
             out_shape[ch_idx] = int(in_shape[ch_idx] / split)
 
         return out_shape
+
+    @property
+    def preserve_quantization(self) -> bool:
+        return True
diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py
index 247bc1798..12040efda 100644
--- a/dlk/python/dlk/core/optimizer.py
+++ b/dlk/python/dlk/core/optimizer.py
@@ -21,7 +21,7 @@
 from core.operators import Add, AveragePool, BatchNormalization, Constant, Conv, Identity, Input, \
     MaxPool, Operator, Output, Transpose, Quantizer, QTZ_binary_mean_scaling, QTZ_linear_mid_tread_half, \
     Reshape, Softmax, Relu, Flatten, Dropout, Gemm, SpaceToDepth, QTZ_binary_channel_wise_mean_scaling, ConcatOnDepth,\
-    Maximum, DepthToSpace, Split
+    Maximum, DepthToSpace, Split, Variable
 
 from typing import Any, Dict, List, Optional, Set, cast
 from functools import reduce
@@ -642,4 +642,3 @@ def threshold_skipping(self, graph: Graph) -> Graph:
         kwargs: Dict[str, Any] = {}
         runner1.run(**kwargs)
         return graph
-
diff --git a/dlk/python/dlk/plugins/tf.py b/dlk/python/dlk/plugins/tf.py
index f25e59d31..584fc1e6f 100644
--- a/dlk/python/dlk/plugins/tf.py
+++ b/dlk/python/dlk/plugins/tf.py
@@ -422,12 +422,12 @@ def convert_operator(self, op_type: str) -> str:
         return dlk_op_type if dlk_op_type else op_type
 
     def create_new_op(self, node: Any, op_dic: Dict[str, Operator], current_format: str,
-                      input_format_list: List[str]) -> Operator:
+                      input_format_list: List[str], nodes_to_remove) -> Operator:
         """Create new operators with Node, Input(Constant), Output."""
         new_op: Operator
 
         if isinstance(node, Node):  # operator nodes
-            new_op = self.create_new_node(node, op_dic, current_format, input_format_list)
+            new_op = self.create_new_node(node, op_dic, current_format, input_format_list, nodes_to_remove)
 
         else:  # Input, Output or Constant
             shape: List[int] = list(map(int, node.get_shape()))
@@ -471,7 +471,10 @@ def create_new_op(self, node: Any, op_dic: Dict[str, Operator], current_format:
     def add_all_nodes(self, graph: Graph) -> None:
         visited: Set[Any] = set()
         added: Dict[str, Operator] = {}
-        self.add_node_to_graph_recursive(self.out_lst[0], graph, visited, added, 'NHWC')
+        nodes_to_remove = []
+        self.add_node_to_graph_recursive(self.out_lst[0], graph, visited, added, 'NHWC', nodes_to_remove)
+        for node in nodes_to_remove:
+            graph.remove_op(node)
 
     def _get_format(self, node: Any, output_format: str) -> Tuple[str, List[str]]:
         """Get the dimension format, like 'NCHW', 'HWCN', 'NHWC', etc."""
@@ -533,7 +536,7 @@ def _get_format(self, node: Any, output_format: str) -> Tuple[str, List[str]]:
             return output_format, [output_format]
 
     def add_node_to_graph_recursive(self, current: Any, graph: Graph, visited: Set[Any], added: Dict[str, Operator],
-                                    data_format: str) \
+                                    data_format: str, nodes_to_remove) \
             -> Operator:
         if current in visited:
             return added[current.name]
@@ -544,10 +547,10 @@ def add_node_to_graph_recursive(self, current: Any, graph: Graph, visited: Set[A
         current_format, input_formats = self._get_format(current, data_format)
         inputs = self.find_inputs(current)
         for in_put, in_format in zip(inputs, input_formats):
-            in_op = self.add_node_to_graph_recursive(in_put, graph, visited, added, in_format)
+            in_op = self.add_node_to_graph_recursive(in_put, graph, visited, added, in_format, nodes_to_remove)
             added_op_dic[in_op.name] = in_op
 
-        op = self.create_new_op(current, added_op_dic, current_format, input_formats)
+        op = self.create_new_op(current, added_op_dic, current_format, input_formats, nodes_to_remove)
 
         graph.add_op(op)
 
@@ -577,7 +580,7 @@ def find_inputs(self, node: Any) -> List[Any]:
         return inputs
 
     def create_new_node(self, node: Node, op_dic: Dict[str, Operator], current_format: str,
-                        input_format_list: List[str]) -> Operator:
+                        input_format_list: List[str], nodes_to_remove) -> Operator:
         """Create a new operator node. This might be tooooo long code...
 
         Parameters
@@ -617,11 +620,13 @@ def create_new_node(self, node: Node, op_dic: Dict[str, Operator], current_forma
         def get_inputs(cdef: Type[Operator], current_node: Any) -> Dict[str, Operator]:
             input_names = cdef.input_names
             in_ops: Dict[str, Operator] = {}
+            in_ops_order: List[int] = []
             for n, op in zip(input_names, current_node.inputs):
                 in_ops[n] = op_dic[op]
-            return in_ops
+                in_ops_order.append(n)
+            return in_ops, in_ops_order
 
-        input_ops = get_inputs(class_def, node)
+        input_ops, input_ops_order = get_inputs(class_def, node)
 
         # Here find the shape and data type for the op
         def infer_shape(attrs: Dict[str, Any]) -> List[int]:
@@ -955,7 +960,7 @@ def infer_dtype() -> DataType:
                 dimension_format=current_format,
             )
         elif op_type == 'ConcatOnDepth':
-            axis = node.attribute('axis')
+            axis = input_ops[input_ops_order[-1]]
             if current_format.index('C') != axis:
                 ValueError('f{op_type} {node.name} concatenation is only supported on the depth axis')
 
@@ -970,6 +975,10 @@ def infer_dtype() -> DataType:
                 input_ops,
                 dimension_format=current_format,
             )
+
+            input_axis_name = input_ops_order[-1]
+            nodes_to_remove.append(new_op.input_ops[input_axis_name])
+            new_op.remove_input(input_axis_name)
         elif op_type == 'Maximum':
 
             if not shape:
diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py
index 95207e576..0b82bce8b 100644
--- a/dlk/python/dlk/scripts/generate_project.py
+++ b/dlk/python/dlk/scripts/generate_project.py
@@ -30,6 +30,8 @@
 from core.optimizer import Optimizer
 from code_generater import CodeGenerater
 from frontend import TensorFlowIO
+from core.graph_pattern_matching import GraphMatcher, Pattern
+from core.operators import Constant
 
 import utils
 
@@ -38,6 +40,214 @@
 ROOT_DIR = path.abspath(path.join(SCRITPS_DIR, '../../..'))
 
 
+def pass_print(graph: Graph, name=str()):
+
+    gm = GraphMatcher(graph)
+
+    print('--- ', name, '---')
+    matches = list()
+    p = Pattern("*")
+    gm.get_op_type_matches(p, matches)
+
+    for m in matches:
+        print('Match: ', m.node.name, m.node.op_type, m.node.dimension)
+        for input_node in m.node.input_nodes:
+            print('   -> ', input_node.name, input_node.op_type)
+
+    print('---')
+
+
+def pass_dot_graph(graph: Graph, filename):
+
+    dot_script = 'digraph {'
+
+    code = {}
+    counter = 0
+    for node in graph.operators:
+        code[node.name] = counter
+        counter += 1
+
+    for node in graph.operators:
+        for input_node in node.input_nodes:
+            quant = node.quantizer.name if node.op_type == 'Conv' and node.quantizer else 'None'
+            aquant = node.a_quantizer[0].name if node.op_type == 'Conv' and node.a_quantizer else 'None'
+
+            dot_script += '"' + format(code[input_node.name], '04X') + '-' + input_node.op_type + '"' + ' -> ' \
+                        + '"' + format(code[node.name], '04X') + '-' + node.op_type + '-' + aquant + '/' + quant + '"' + ';'
+
+    dot_script += '}'
+
+    with open(filename, 'w') as f:
+        f.write(dot_script)
+
+
+def pass_remove_identities(graph: Graph):
+
+    gm = GraphMatcher(graph)
+
+    to_be_removed = list()
+    matches = list()
+    p = Pattern("Identity")
+    gm.get_op_type_matches(p, matches)
+
+    for m in matches:
+        # print('Match: ', m.node.name, m.node.op_type)
+        # for input_node in m.node.input_nodes:
+        #     print('   -> ', input_node.name, input_node.op_type)
+
+        """skip all identity."""
+        in_op = m.node.input_ops['input']
+        out_ops = m.node.output_ops['output']
+        for out_op in out_ops:
+            for k, v in out_op.input_ops.items():
+                if v == m.node:
+                    # change the output's input to this identity's input
+                    out_op.add_input(k, in_op)
+                    # change the input's output to this identity's output
+                    for k2, v2 in in_op.output_ops.items():
+                        if m.node in v2:
+                            v2.remove(m.node)
+                            v2.append(out_op)
+                            break
+                    break
+
+        to_be_removed.append(m.node)
+
+    for op in to_be_removed:
+        graph.remove_op(op)
+
+
+def pass_transpose(graph):
+
+    gm = GraphMatcher(graph)
+
+    matches = list()
+    p = Pattern("*")
+    gm.get_op_type_matches(p, matches)
+
+    for m in matches:
+        # print('Match: ', m.node.name, m.node.op_type)
+        # for input_node in m.node.input_nodes:
+        #     print('   -> ', input_node.name, input_node.op_type)
+
+        dim = m.node.dimension
+        shape = m.node.shape
+        if len(shape) != 4 or len(dim) != 4 or not set(dim).issubset({'N', 'H', 'W', 'C', 'I', 'O'}):
+            continue
+
+        dim = dim.replace('I', 'C')
+        dim = dim.replace('O', 'N')
+
+        permutation = list(map(lambda s: dim.index(s), 'NHWC'))
+        m.node.transpose(permutation)
+
+
+def pass_precompute(graph) -> int:
+
+    gm = GraphMatcher(graph)
+
+    to_be_removed = list()
+    matches = list()
+    p = Pattern("*")
+    gm.get_op_type_matches(p, matches)
+
+    for m in matches:
+
+        # We want operators with inputs
+        if not m.node.input_nodes:
+            continue
+
+        # Leave out nodes which execution will lose information.
+        # They will have a special processing later.
+        if m.node.run_it_will_lose_information:
+            continue
+
+        precomputable = True
+        for input_node in m.node.input_nodes:
+            if input_node.op_type != 'Constant':
+                precomputable = False
+
+        if not precomputable:
+            continue
+
+        to_be_removed += m.node.input_nodes
+        to_be_removed.append(m.node)
+
+        m.node.run_forward()
+
+        new_constant = Constant(
+            m.node.name + '_new',
+            m.node.dtype,
+            m.node.data,
+            dimension_format=m.node.dimension
+        )
+
+        graph.add_op(new_constant)
+
+        new_constant.add_outputs(m.node.output_ops)
+        for output_name, consumer_list in m.node.output_ops.items():
+            for consumer_node in consumer_list:
+                for input_name, input_node in consumer_node.input_ops.items():
+                    if input_node == m.node:
+                        consumer_node.add_input(input_name, new_constant)
+                        break
+
+    for op in to_be_removed:
+        graph.remove_op(op)
+
+    return len(to_be_removed)
+
+
+def pass_propagate_quantization_details_into_conv(graph):
+
+    gm = GraphMatcher(graph)
+
+    matches = list()
+    p = Pattern('*')
+    gm.get_op_type_matches(p, matches)
+
+    quantization_types = [
+        'QTZ_binary_mean_scaling',
+        'QTZ_linear_mid_tread_half',
+        'QTZ_binary_channel_wise_mean_scaling'
+    ]
+
+    quantization_details = {}
+    for m in matches:
+        if not m.node.preserve_quantization:
+            quantization_details[m.node.name] = None
+            continue
+
+        current_node_quant_details = []
+        for input_node in m.node.input_nodes:
+            if input_node.op_type in quantization_types:
+                current_node_quant_details.append(input_node)
+            else:
+                current_node_quant_details.append(quantization_details[input_node.name])
+
+        if m.node.op_type == 'Conv':
+            m.node.a_quantizer = [current_node_quant_details[0]] if current_node_quant_details[0] else []
+            m.node.quantizer = current_node_quant_details[1]
+            quantization_details[m.node.name] = None
+        else:
+            all_quantizers = True
+            for quantizer in current_node_quant_details:
+                if not quantizer:
+                    all_quantizers = False
+                    break
+
+            if not all_quantizers:
+                same_nbits = False
+            else:
+                same_nbits = all(quantizer.nbit == current_node_quant_details[0].nbit
+                                 for quantizer in current_node_quant_details)
+
+            quantization_details[m.node.name] = current_node_quant_details[0] if same_nbits else None
+
+            if not same_nbits:
+                print(f'Warning: Not every input node of {m.node.name} is quantized to the same bit-width')
+
+
 def optimize_graph_step(model: Model, config: Config) -> None:
     """Optimze graph in the model.
 
@@ -51,6 +261,26 @@ def optimize_graph_step(model: Model, config: Config) -> None:
 
     """
     graph: Graph = model.graph
+
+    pass_print(graph, 'Before')
+    pass_dot_graph(graph, '/tmp/original.dot')
+
+    pass_remove_identities(graph)
+    pass_print(graph, 'After identity')
+    pass_dot_graph(graph, '/tmp/prune_identities.dot')
+
+    pass_transpose(graph)
+    pass_print(graph, 'After transpose')
+    pass_dot_graph(graph, '/tmp/transposed.dot')
+
+    pass_precompute(graph)
+    pass_print(graph, 'After precompute')
+
+    pass_propagate_quantization_details_into_conv(graph)
+    pass_print(graph, 'After propagate')
+
+    pass_dot_graph(graph, '/tmp/final.dot')
+
     optim = Optimizer()
     optim.transpose_NHWC(graph)
     optim.precompute(graph, config.activate_hard_quantization)
diff --git a/dlk/python/dlk/templates/Makefile.tpl b/dlk/python/dlk/templates/Makefile.tpl
index 14d5a2b8b..b8c417f0d 100644
--- a/dlk/python/dlk/templates/Makefile.tpl
+++ b/dlk/python/dlk/templates/Makefile.tpl
@@ -134,17 +134,17 @@ clean:
 	-$(RM) $(OBJ)
 
 lm_x86:           CXX = g++
-lm_x86:           FLAGS += $(INCLUDES) -O3 -std=c++0x -g -DUSE_PNG -pthread -g
+lm_x86:           FLAGS += $(INCLUDES) -O3 -std=c++0x -g -DUSE_PNG -pthread -g  -DFUNC_TIME_MEASUREMENT
 
 lm_aarch64:       CXX = aarch64-linux-gnu-g++
-lm_aarch64:       FLAGS += $(INCLUDES) -O3 -std=c++0x -g -DUSE_NEON -DUSE_PNG -pthread -g
+lm_aarch64:       FLAGS += $(INCLUDES) -O3 -std=c++0x -g -DUSE_NEON -DUSE_PNG -pthread -g  -DFUNC_TIME_MEASUREMENT
 
 lm_arm:           CXX = arm-linux-gnueabihf-g++
-lm_arm:           FLAGS += $(INCLUDES) -std=c++0x -O3 -DUSE_NEON -DUSE_PNG -mcpu=cortex-a9 -mfpu=neon -mthumb -s -pthread -g -fopenmp
+lm_arm:           FLAGS += $(INCLUDES) -std=c++0x -O3 -DUSE_NEON -DUSE_PNG -mcpu=cortex-a9 -mfpu=neon -mthumb -s -pthread -g -fopenmp  -DFUNC_TIME_MEASUREMENT
 lm_arm:           CXXFLAGS +=
 
 lm_fpga:          CXX = arm-linux-gnueabihf-g++
-lm_fpga:          FLAGS += $(INCLUDES) -std=c++0x -O3 -DUSE_NEON -DRUN_ON_FPGA -DUSE_PNG -mcpu=cortex-a9 -mfpu=neon -mthumb -s -pthread -g -fopenmp
+lm_fpga:          FLAGS += $(INCLUDES) -std=c++0x -O3 -DUSE_NEON -DRUN_ON_FPGA -DUSE_PNG -mcpu=cortex-a9 -mfpu=neon -mthumb -s -pthread -g -fopenmp -DFUNC_TIME_MEASUREMENT
 lm_fpga:          CXXFLAGS +=
 
 lib_x86:           CXX = g++

From 7376a1245b904999d06edb2ed993c271e3f964d8 Mon Sep 17 00:00:00 2001
From: Antonio <antonio@leapmind.io>
Date: Thu, 29 Nov 2018 15:16:17 +0900
Subject: [PATCH 02/45] Fix: not necessary, axis is attribute in DLK IR

---
 dlk/python/dlk/core/view.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dlk/python/dlk/core/view.py b/dlk/python/dlk/core/view.py
index 38fb28319..d711576a0 100644
--- a/dlk/python/dlk/core/view.py
+++ b/dlk/python/dlk/core/view.py
@@ -655,7 +655,7 @@ def run(self):
             input_list_name = op.name + '_inputs'
             depth_list_name = op.name + '_inputs_depth'
 
-            number_of_inputs = len(input_ops) - 1
+            number_of_inputs = len(input_ops)
             concat_input = {}
             for k, v in input_ops.items():
                 if not v.is_variable:

From 8a269d5858a86081dfeac5be6de9216fe1174e5a Mon Sep 17 00:00:00 2001
From: Antonio <antonio@leapmind.io>
Date: Wed, 5 Dec 2018 18:44:19 +0900
Subject: [PATCH 03/45] Added a pass for computing thresholds based on binary
 search

---
 dlk/python/dlk/core/operators.py           |   3 +
 dlk/python/dlk/scripts/generate_project.py | 100 ++++++++++++++++++++-
 2 files changed, 99 insertions(+), 4 deletions(-)

diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py
index 09c5d39d0..9ceabb4b5 100644
--- a/dlk/python/dlk/core/operators.py
+++ b/dlk/python/dlk/core/operators.py
@@ -653,6 +653,9 @@ def __init__(self,
         self._packed = packed
         super().__init__(name, shape, dtype, {}, data, dimension_format=dimension_format)
 
+    def run_forward(self) -> np.ndarray:
+        return self._data
+
     @property
     def is_packed(self) -> bool:
         return self._packed
diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py
index 0b82bce8b..57bfb4df2 100644
--- a/dlk/python/dlk/scripts/generate_project.py
+++ b/dlk/python/dlk/scripts/generate_project.py
@@ -22,6 +22,7 @@
 import click
 from os import path
 import shutil
+import numpy as np
 
 from core.config import Config
 from core.graph import Graph
@@ -30,9 +31,10 @@
 from core.optimizer import Optimizer
 from code_generater import CodeGenerater
 from frontend import TensorFlowIO
-from core.graph_pattern_matching import GraphMatcher, Pattern
+from core.graph_pattern_matching import GraphMatcher, Pattern, match_to_execution_list
 from core.operators import Constant
 
+from collections import defaultdict
 import utils
 
 SCRITPS_DIR = path.abspath(path.dirname(__file__))
@@ -69,11 +71,9 @@ def pass_dot_graph(graph: Graph, filename):
 
     for node in graph.operators:
         for input_node in node.input_nodes:
-            quant = node.quantizer.name if node.op_type == 'Conv' and node.quantizer else 'None'
-            aquant = node.a_quantizer[0].name if node.op_type == 'Conv' and node.a_quantizer else 'None'
 
             dot_script += '"' + format(code[input_node.name], '04X') + '-' + input_node.op_type + '"' + ' -> ' \
-                        + '"' + format(code[node.name], '04X') + '-' + node.op_type + '-' + aquant + '/' + quant + '"' + ';'
+                        + '"' + format(code[node.name], '04X') + '-' + node.op_type + '"' + ';'
 
     dot_script += '}'
 
@@ -248,6 +248,95 @@ def pass_propagate_quantization_details_into_conv(graph):
                 print(f'Warning: Not every input node of {m.node.name} is quantized to the same bit-width')
 
 
+def pass_compute_thresholds(graph):
+
+    gm = GraphMatcher(graph)
+
+    quantization_types_pattern = \
+        'QTZ_linear_mid_tread_half'
+
+    matches = list()
+    p = Pattern(quantization_types_pattern,
+                [
+                    Pattern('BatchNormalization',
+                            [
+                                Pattern('Conv'),
+                                Pattern('*'),
+                                Pattern('*'),
+                                Pattern('*'),
+                                Pattern('*')
+                            ]),
+                    Pattern('*'),
+                    Pattern('*'),
+                ])
+
+    gm.get_op_type_matches(p, matches)
+
+    for m in matches:
+
+        quantizer_conv_output_node = m.node
+        batch_norm_node = quantizer_conv_output_node.input_nodes[0]
+        conv_node = batch_norm_node.input_nodes[0]
+
+        # check if this is a quantized convolution
+        if not conv_node.quantizer or not conv_node.a_quantizer:
+            continue
+
+        quantizer_conv_weights = conv_node.quantizer
+        quantizer_conv_weights.run_forward()
+        scaling_factor = quantizer_conv_weights.scaling_factor
+
+        match_execution_list = list()
+        match_to_execution_list(m, match_execution_list)
+
+        ths = defaultdict(list)
+        computed_quantized_results = defaultdict(set)
+        magic_number = 2
+
+        # TODO: make '3' function on the number of bits of the number of bits
+        for value in range(0, 3):
+            for idx in range(scaling_factor.size):
+
+                # assume that the output value will be a 16-bit signed integer
+                low = -(2**15)
+                high = 2**15 - 1
+
+                # binary search
+                while low <= high:
+                    mid = low + (high - low) // 2
+                    input_data = (scaling_factor * mid) * 2.0 / 3.0 # TODO: get from quantizers (n_bits, max_value)
+                    data_dict = batch_norm_node.run(data=input_data)
+                    data_dict = quantizer_conv_output_node.run(data=data_dict['data'])
+                    result = data_dict['data'][idx]
+                    computed_quantized_results[idx].add(result)
+
+                    if result > value:
+                        high = mid - 1
+                    else:
+                        low = mid + 1
+
+                ths[idx].append(low)
+
+        # check if increasing, decreasing or constant
+        for channel, values in computed_quantized_results.items():
+            if len(values) == 1:
+                ths[channel].append(values.pop() + magic_number)
+            else:
+                first_threshold_result = values.pop()
+                second_threshold_result = values.pop()
+
+                if first_threshold_result < second_threshold_result:
+                    ths[channel].append(1)
+                else:
+                    ths[channel].append(-1)
+
+        # put everything into a list to be compatible with the rest of the code
+        ths_list = []
+        for channel in sorted(ths.keys()):
+            ths_list += ths[channel]
+        conv_node.thresholds = ths_list
+
+
 def optimize_graph_step(model: Model, config: Config) -> None:
     """Optimze graph in the model.
 
@@ -273,12 +362,15 @@ def optimize_graph_step(model: Model, config: Config) -> None:
     pass_print(graph, 'After transpose')
     pass_dot_graph(graph, '/tmp/transposed.dot')
 
+    # TODO: call until pass_precompute returns 0
     pass_precompute(graph)
     pass_print(graph, 'After precompute')
 
     pass_propagate_quantization_details_into_conv(graph)
     pass_print(graph, 'After propagate')
 
+    pass_compute_thresholds(graph)
+
     pass_dot_graph(graph, '/tmp/final.dot')
 
     optim = Optimizer()

From ebe9d3ad2d9b46b772a41fd0fd0cac8713974275 Mon Sep 17 00:00:00 2001
From: Antonio <antonio@leapmind.io>
Date: Fri, 7 Dec 2018 14:28:34 +0900
Subject: [PATCH 04/45] [WIP] Visualization, weight packing and small fixes.
 Start to comment out old code

---
 dlk/python/dlk/core/operators.py           |   7 -
 dlk/python/dlk/core/optimizer.py           | 147 +++++++++++----------
 dlk/python/dlk/scripts/generate_project.py | 140 +++++++++++++++-----
 3 files changed, 178 insertions(+), 116 deletions(-)

diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py
index 9ceabb4b5..680591650 100644
--- a/dlk/python/dlk/core/operators.py
+++ b/dlk/python/dlk/core/operators.py
@@ -568,10 +568,6 @@ def visited(self) -> bool:
     def visited(self, v: Bool) -> None:
         self._visited = v
 
-    @property
-    def run_it_will_lose_information(self) -> bool:
-        return False
-
     @property
     def preserve_quantization(self) -> bool:
         return False
@@ -778,9 +774,6 @@ def binarizer(self, data: np.ndarray) -> np.ndarray:
         raise NotImplementedError(
             f'operator {self.op_type} need to implement the binarizer method')
 
-    @property
-    def run_it_will_lose_information(self) -> bool:
-        return True
 
 
 
diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py
index 12040efda..27e03939a 100644
--- a/dlk/python/dlk/core/optimizer.py
+++ b/dlk/python/dlk/core/optimizer.py
@@ -485,87 +485,88 @@ def _apply_threshold_skipping(self, op_lst: List[Operator]) -> None:
 
         if start is not None and finish is not None:
 
-            def linear_qtz2float(x: np.ndarray, n_value: int, max_value: float) -> np.ndarray:
-                real_x = x / np.float64(n_value) * np.float64(max_value)
-                return real_x.astype(np.float64)
+            # def linear_qtz2float(x: np.ndarray, n_value: int, max_value: float) -> np.ndarray:
+            #     real_x = x / np.float64(n_value) * np.float64(max_value)
+            #     return real_x.astype(np.float64)
 
             # Step 1: Compute thresholds for Convolution operators
-            aqtzer = cast(Quantizer, start.a_quantizer[0])  # Activation Quantizers should all have the same bits
-            bit = aqtzer.nbit
-            max_v = aqtzer.max_v
-            if bit is None or max_v is None:
-                ValueError(f'activation quantizer of node {start.name} has bit or max value of None')
-
-            n = 2 ** bit - 1
-            ch = start.channel
-            lch = start.input_ops['X'].channel
-            k = start.kernel_height * start.kernel_width * lch * n
-            qtzer = cast(Quantizer, start.quantizer)
-            conv_results = [x for x in range(-k, k + 1, 1)]
-            th_tmp = np.empty([ch, n + 1], dtype=np.int32)
-            v_now = dict.fromkeys([x for x in range(ch)], 0)
-            th_now = 0
-            val_neg_flag = -1
-            val_pos_flag = 1
-            all_transdata: Dict[int, Dict[str, Any]] = {}
+            # aqtzer = cast(Quantizer, start.a_quantizer[0])  # Activation Quantizers should all have the same bits
+            # bit = aqtzer.nbit
+            # max_v = aqtzer.max_v
+            # if bit is None or max_v is None:
+            #     ValueError(f'activation quantizer of node {start.name} has bit or max value of None')
+
+            # n = 2 ** bit - 1
+            # ch = start.channel
+            # lch = start.input_ops['X'].channel
+            # k = start.kernel_height * start.kernel_width * lch * n
+            # qtzer = cast(Quantizer, start.quantizer)
+            # conv_results = [x for x in range(-k, k + 1, 1)]
+            # th_tmp = np.empty([ch, n + 1], dtype=np.int32)
+            # v_now = dict.fromkeys([x for x in range(ch)], 0)
+            # th_now = 0
+            # val_neg_flag = -1
+            # val_pos_flag = 1
+            # all_transdata: Dict[int, Dict[str, Any]] = {}
 
             # Step 1-1: initalize thresholds
-            for conv_res in conv_results:
-                conv_out = np.full(ch, conv_res, dtype=np.float64)
-                conv_out *= qtzer.scaling_factor if qtzer.scaling_factor is not None \
-                    else ValueError(f'oops Quantizer of node {start.name} has scaling factor of None')
-
-                conv_data = linear_qtz2float(conv_out, n, max_v)
-
-                trans_data: Dict[str, Any] = {'data': conv_data}
-                for idx, op in sorted(transitions.items(), reverse=True):
-                    trans_data = op.run(**trans_data)
-
-                for depth in range(ch):
-                    init = -k if depth in trans_data['nega_idx'] else k
-                    th_tmp[depth, :] = init
-
-                all_transdata[conv_res] = trans_data
+            # for conv_res in conv_results:
+            #     conv_out = np.full(ch, conv_res, dtype=np.float64)
+            #     conv_out *= qtzer.scaling_factor if qtzer.scaling_factor is not None \
+            #         else ValueError(f'oops Quantizer of node {start.name} has scaling factor of None')
+            #
+            #     conv_data = linear_qtz2float(conv_out, n, max_v)
+            #
+            #     trans_data: Dict[str, Any] = {'data': conv_data}
+            #     for idx, op in sorted(transitions.items(), reverse=True):
+            #         trans_data = op.run(**trans_data)
+            #
+            #     for depth in range(ch):
+            #         init = -k if depth in trans_data['nega_idx'] else k
+            #         th_tmp[depth, :] = init
+            #
+            #     all_transdata[conv_res] = trans_data
 
             # Step 1-2: update thresholds
-            for conv_res in conv_results:
-                trans_data = all_transdata[conv_res]
-                qtz_out = trans_data['data']
-                qtz_mu = np.mean(qtz_out)
-                if qtz_mu != th_now:
-                    for depth in range(ch):
-                        is_negative = depth in trans_data['nega_idx']
-                        if v_now.get(depth) != qtz_out[depth]:
-                            if is_negative:
-                                th_tmp[depth, abs(n - qtz_out[depth] - 1)] = conv_res
-                            else:
-                                th_tmp[depth, qtz_out[depth] - 1] = conv_res
-                            v_now[depth] = qtz_out[depth]
-                        th_tmp[depth, n] = -1 if is_negative else 1
-                for depth in range(ch):
-                    constant = reduce(lambda x, y: x and y,
-                                      [th_tmp[depth, i] == th_tmp[depth, i + 1] for i in range(n - 1)])
-                    th_tmp[depth, n] = qtz_out[depth] + 2 if constant else th_tmp[depth, n]
-                    # note: 2 above is a magic number. the result value must not be 1 nor -1.
-                th_now = qtz_mu
-
-            start.thresholds = th_tmp.flatten().tolist()
+            # for conv_res in conv_results:
+            #     trans_data = all_transdata[conv_res]
+            #     qtz_out = trans_data['data']
+            #     qtz_mu = np.mean(qtz_out)
+            #     if qtz_mu != th_now:
+            #         for depth in range(ch):
+            #             is_negative = depth in trans_data['nega_idx']
+            #             if v_now.get(depth) != qtz_out[depth]:
+            #                 if is_negative:
+            #                     th_tmp[depth, abs(n - qtz_out[depth] - 1)] = conv_res
+            #                 else:
+            #                     th_tmp[depth, qtz_out[depth] - 1] = conv_res
+            #                 v_now[depth] = qtz_out[depth]
+            #             th_tmp[depth, n] = -1 if is_negative else 1
+            #     for depth in range(ch):
+            #         constant = reduce(lambda x, y: x and y,
+            #                           [th_tmp[depth, i] == th_tmp[depth, i + 1] for i in range(n - 1)])
+            #         th_tmp[depth, n] = qtz_out[depth] + 2 if constant else th_tmp[depth, n]
+            #         # note: 2 above is a magic number. the result value must not be 1 nor -1.
+            #     th_now = qtz_mu
+
+            # start.thresholds = th_tmp.flatten().tolist()
 
             # Step 2: Skipping unused operators, e.g. batch normalization, linear activation quantizer
-            if start.has_thresholds:
-                if start.dtype is not finish.dtype:
-                    start.dtype = finish.dtype
-                for consumers in finish.output_ops.values():
-                    for consumer in consumers:
-                        for idex, y in start.output_ops.items():
-                            if not bool(set(consumers) & set(y)):
-                                start.remove_output(idex)
-                            start.add_output(idex, consumer)
-
-                        for indent, v in consumer.input_ops.items():
-                            if v == finish:
-                                consumer.add_input(indent, start)
-                                break
+            pass
+            # if start.has_thresholds:
+            #     if start.dtype is not finish.dtype:
+            #         start.dtype = finish.dtype
+            #     for consumers in finish.output_ops.values():
+            #         for consumer in consumers:
+            #             for idex, y in start.output_ops.items():
+            #                 if not bool(set(consumers) & set(y)):
+            #                     start.remove_output(idex)
+            #                 start.add_output(idex, consumer)
+            #
+            #             for indent, v in consumer.input_ops.items():
+            #                 if v == finish:
+            #                     consumer.add_input(indent, start)
+            #                     break
         else:
             pass
 
diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py
index 57bfb4df2..606871625 100644
--- a/dlk/python/dlk/scripts/generate_project.py
+++ b/dlk/python/dlk/scripts/generate_project.py
@@ -33,7 +33,9 @@
 from frontend import TensorFlowIO
 from core.graph_pattern_matching import GraphMatcher, Pattern, match_to_execution_list
 from core.operators import Constant
-
+from modules.packer import Packer
+from core.data_types import Uint32, QUANTIZED_NOT_PACKED
+from typing import cast
 from collections import defaultdict
 import utils
 
@@ -69,11 +71,22 @@ def pass_dot_graph(graph: Graph, filename):
         code[node.name] = counter
         counter += 1
 
+    # for node in graph.operators:
+    #     for input_node in node.input_nodes:
+    #
+    #         dot_script += '"' + format(code[input_node.name], '04X') + '-' + input_node.op_type + '"' + ' -> ' \
+    #                     + '"' + format(code[node.name], '04X') + '-' + node.op_type + '"' + ';'
+
     for node in graph.operators:
-        for input_node in node.input_nodes:
 
-            dot_script += '"' + format(code[input_node.name], '04X') + '-' + input_node.op_type + '"' + ' -> ' \
-                        + '"' + format(code[node.name], '04X') + '-' + node.op_type + '"' + ';'
+        shape = '-'
+        if node.shape:
+            shape = 'x'.join(str(x) for x in node.shape)
+        shape += '(' + node.dimension + ')'
+
+        dot_script += node.name + '[label="<f0> ' + format(code[node.name], '04X') + '| <f1> ' + node.op_type + '| <f2> ' + shape + '| <f3> ' + node.dtype.cpptype() + '" shape = "record"];'
+        for i in node.input_nodes:
+            dot_script += i.name + ' -> ' + node.name + ';'
 
     dot_script += '}'
 
@@ -91,10 +104,6 @@ def pass_remove_identities(graph: Graph):
     gm.get_op_type_matches(p, matches)
 
     for m in matches:
-        # print('Match: ', m.node.name, m.node.op_type)
-        # for input_node in m.node.input_nodes:
-        #     print('   -> ', input_node.name, input_node.op_type)
-
         """skip all identity."""
         in_op = m.node.input_ops['input']
         out_ops = m.node.output_ops['output']
@@ -126,10 +135,6 @@ def pass_transpose(graph):
     gm.get_op_type_matches(p, matches)
 
     for m in matches:
-        # print('Match: ', m.node.name, m.node.op_type)
-        # for input_node in m.node.input_nodes:
-        #     print('   -> ', input_node.name, input_node.op_type)
-
         dim = m.node.dimension
         shape = m.node.shape
         if len(shape) != 4 or len(dim) != 4 or not set(dim).issubset({'N', 'H', 'W', 'C', 'I', 'O'}):
@@ -142,26 +147,24 @@ def pass_transpose(graph):
         m.node.transpose(permutation)
 
 
-def pass_precompute(graph) -> int:
+def pass_precompute(graph, processed_nodes):
 
     gm = GraphMatcher(graph)
 
-    to_be_removed = list()
     matches = list()
-    p = Pattern("*")
+    p = Pattern('*')
     gm.get_op_type_matches(p, matches)
 
+    processed_before_precompute = len(processed_nodes)
+
     for m in matches:
+        if m.node in processed_nodes:
+            continue
 
         # We want operators with inputs
         if not m.node.input_nodes:
             continue
 
-        # Leave out nodes which execution will lose information.
-        # They will have a special processing later.
-        if m.node.run_it_will_lose_information:
-            continue
-
         precomputable = True
         for input_node in m.node.input_nodes:
             if input_node.op_type != 'Constant':
@@ -170,8 +173,8 @@ def pass_precompute(graph) -> int:
         if not precomputable:
             continue
 
-        to_be_removed += m.node.input_nodes
-        to_be_removed.append(m.node)
+        processed_nodes += m.node.input_nodes
+        processed_nodes.append(m.node)
 
         m.node.run_forward()
 
@@ -192,10 +195,7 @@ def pass_precompute(graph) -> int:
                         consumer_node.add_input(input_name, new_constant)
                         break
 
-    for op in to_be_removed:
-        graph.remove_op(op)
-
-    return len(to_be_removed)
+    return len(processed_nodes) > processed_before_precompute
 
 
 def pass_propagate_quantization_details_into_conv(graph):
@@ -286,9 +286,6 @@ def pass_compute_thresholds(graph):
         quantizer_conv_weights.run_forward()
         scaling_factor = quantizer_conv_weights.scaling_factor
 
-        match_execution_list = list()
-        match_to_execution_list(m, match_execution_list)
-
         ths = defaultdict(list)
         computed_quantized_results = defaultdict(set)
         magic_number = 2
@@ -298,8 +295,9 @@ def pass_compute_thresholds(graph):
             for idx in range(scaling_factor.size):
 
                 # assume that the output value will be a 16-bit signed integer
-                low = -(2**15)
-                high = 2**15 - 1
+                n = 2**15
+                low = -n + 1
+                high = n - 2
 
                 # binary search
                 while low <= high:
@@ -336,6 +334,72 @@ def pass_compute_thresholds(graph):
             ths_list += ths[channel]
         conv_node.thresholds = ths_list
 
+        # Disconnect batchnorm and the quantizer
+        out_ops = quantizer_conv_output_node.output_ops['output']
+        for output_node in out_ops:
+            for input_name, input_node in output_node.input_ops.items():
+                if input_node == quantizer_conv_output_node:
+                    output_node.add_input(input_name, conv_node)
+
+        conv_node.remove_output('Y')
+        conv_node.add_outputs({'Y': out_ops})
+
+        # TODO: temporary (only for drawing better graphs)
+        batch_norm_node.remove_input('X')
+
+
+def pass_pack_weights(graph):
+
+    gm = GraphMatcher(graph)
+
+    quantization_types = [
+        'QTZ_binary_mean_scaling',
+        'QTZ_linear_mid_tread_half',
+        'QTZ_binary_channel_wise_mean_scaling'
+    ]
+
+    matches = list()
+    p = Pattern('Conv')
+
+    gm.get_op_type_matches(p, matches)
+
+    # TODO: pass proper parameters
+    packer = Packer(1, 32)
+
+    for m in matches:
+        conv_node = m.node
+
+        # check if this is a quantized convolution
+        if not conv_node.quantizer or not conv_node.a_quantizer:
+            continue
+
+        weight_quantizer = conv_node.quantizer
+        if weight_quantizer.op_type not in quantization_types:
+            continue
+
+        # Quantize the weights
+        weight_quantizer.run_forward()
+        op_data = weight_quantizer.binarizer(weight_quantizer.data)
+        data = packer.run(op_data.astype(np.float32), weight_quantizer.dimension)
+
+        quantized_constant = Constant(
+            weight_quantizer.name + '_new',
+            Uint32(),
+            data,
+            packed=True,
+            actual_shape=weight_quantizer.shape
+        )
+
+        graph.add_op(quantized_constant)
+
+        quantized_constant.add_outputs(weight_quantizer.output_ops)
+        for output_name, consumer_list in weight_quantizer.output_ops.items():
+            for consumer_node in consumer_list:
+                for input_name, input_node in consumer_node.input_ops.items():
+                    if input_node == weight_quantizer:
+                        consumer_node.add_input(input_name, quantized_constant)
+                        break
+
 
 def optimize_graph_step(model: Model, config: Config) -> None:
     """Optimze graph in the model.
@@ -362,24 +426,28 @@ def optimize_graph_step(model: Model, config: Config) -> None:
     pass_print(graph, 'After transpose')
     pass_dot_graph(graph, '/tmp/transposed.dot')
 
-    # TODO: call until pass_precompute returns 0
-    pass_precompute(graph)
-    pass_print(graph, 'After precompute')
-
     pass_propagate_quantization_details_into_conv(graph)
     pass_print(graph, 'After propagate')
 
     pass_compute_thresholds(graph)
+    pass_pack_weights(graph)
+
+    # processed_nodes = []
+    # while pass_precompute(graph, processed_nodes=processed_nodes):
+    #     pass
+    # pass_print(graph, 'After precompute')
 
     pass_dot_graph(graph, '/tmp/final.dot')
 
     optim = Optimizer()
-    optim.transpose_NHWC(graph)
+    # optim.transpose_NHWC(graph)
     optim.precompute(graph, config.activate_hard_quantization)
     if config.threshold_skipping:
         optim.threshold_skipping(graph)
 
 
+
+
 def generate_code_step(model: Model, config: Config) -> None:
     """Generate code for the model.
 

From 180d156581dd40c15efbc1690421b57f6bfac7bf Mon Sep 17 00:00:00 2001
From: Antonio <antonio@leapmind.io>
Date: Sat, 8 Dec 2018 12:33:15 +0900
Subject: [PATCH 05/45] [WIP] Add extra passes and small bugfixes. Now doesnt
 depend on old optimizer code

---
 dlk/python/dlk/core/operators.py           |  19 ++++
 dlk/python/dlk/scripts/generate_project.py | 114 +++++++++++++--------
 2 files changed, 92 insertions(+), 41 deletions(-)

diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py
index 680591650..369860de2 100644
--- a/dlk/python/dlk/core/operators.py
+++ b/dlk/python/dlk/core/operators.py
@@ -822,6 +822,14 @@ def run_forward(self) -> np.ndarray:
         in_data = self.input_ops['input'].data
         self._scaling_factor = np.mean(np.abs(in_data))
         self._data = np.sign(in_data)
+
+        return self._data * self._scaling_factor
+
+    def run_forward_no_scaling_factor(self) -> np.ndarray:
+        in_data = self.input_ops['input'].data
+        self._scaling_factor = np.mean(np.abs(in_data))
+        self._data = np.sign(in_data)
+
         return self._data
 
     @classmethod
@@ -2306,6 +2314,17 @@ def run_forward(self) -> np.ndarray:
         in_data = self.input_ops['input'].data
         self._scaling_factor = np.mean(np.abs(in_data), axis=(1, 2, 3)).astype(np.float32)
         self._data = np.sign(in_data)
+
+        scaling = copy.deepcopy(self._scaling_factor)
+        extra_dims = tuple(np.ones((len(self._data.shape) - len(scaling.shape)), dtype=np.int32))
+        scaling = scaling.reshape(scaling.shape + extra_dims)
+
+        return scaling * self._data
+
+    def run_forward_no_scaling_factor(self) -> np.ndarray:
+        in_data = self.input_ops['input'].data
+        self._scaling_factor = np.mean(np.abs(in_data), axis=(1, 2, 3)).astype(np.float32)
+        self._data = np.sign(in_data)
         return self._data
 
     def binarizer(self, data: np.ndarray) -> np.ndarray:
diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py
index 606871625..960824834 100644
--- a/dlk/python/dlk/scripts/generate_project.py
+++ b/dlk/python/dlk/scripts/generate_project.py
@@ -176,12 +176,12 @@ def pass_precompute(graph, processed_nodes):
         processed_nodes += m.node.input_nodes
         processed_nodes.append(m.node)
 
-        m.node.run_forward()
+        data = m.node.run_forward()
 
         new_constant = Constant(
             m.node.name + '_new',
             m.node.dtype,
-            m.node.data,
+            data,
             dimension_format=m.node.dimension
         )
 
@@ -206,46 +206,37 @@ def pass_propagate_quantization_details_into_conv(graph):
     p = Pattern('*')
     gm.get_op_type_matches(p, matches)
 
-    quantization_types = [
+    qtypes = [
         'QTZ_binary_mean_scaling',
         'QTZ_linear_mid_tread_half',
         'QTZ_binary_channel_wise_mean_scaling'
     ]
 
-    quantization_details = {}
+    quant_details = defaultdict(list)
     for m in matches:
         if not m.node.preserve_quantization:
-            quantization_details[m.node.name] = None
+            quant_details[m.node.name] = []
             continue
 
-        current_node_quant_details = []
-        for input_node in m.node.input_nodes:
-            if input_node.op_type in quantization_types:
-                current_node_quant_details.append(input_node)
-            else:
-                current_node_quant_details.append(quantization_details[input_node.name])
-
         if m.node.op_type == 'Conv':
-            m.node.a_quantizer = [current_node_quant_details[0]] if current_node_quant_details[0] else []
-            m.node.quantizer = current_node_quant_details[1]
-            quantization_details[m.node.name] = None
-        else:
-            all_quantizers = True
-            for quantizer in current_node_quant_details:
-                if not quantizer:
-                    all_quantizers = False
-                    break
+            input_node = m.node.input_nodes[0]
+            weight_node = m.node.input_nodes[1]
 
-            if not all_quantizers:
-                same_nbits = False
-            else:
-                same_nbits = all(quantizer.nbit == current_node_quant_details[0].nbit
-                                 for quantizer in current_node_quant_details)
+            m.node.a_quantizer = [input_node] if input_node.op_type in qtypes else quant_details[input_node.name]
+            m.node.quantizer = weight_node if weight_node.op_type in qtypes else quant_details[weight_node.name]
 
-            quantization_details[m.node.name] = current_node_quant_details[0] if same_nbits else None
+            quant_details[m.node.name] = []
+        else:
+            qtzs = []
+            for n in m.node.input_nodes:
+                if n.op_type in qtypes:
+                    qtzs.append(n)
+                else:
+                    for q in quant_details[n.name]:
+                        qtzs.append(q)
 
-            if not same_nbits:
-                print(f'Warning: Not every input node of {m.node.name} is quantized to the same bit-width')
+            quant_details[m.node.name] = qtzs if len(qtzs) == len(m.node.input_nodes) else []
+            # TODO: check if the quantizers use same n_bits
 
 
 def pass_compute_thresholds(graph):
@@ -283,7 +274,7 @@ def pass_compute_thresholds(graph):
             continue
 
         quantizer_conv_weights = conv_node.quantizer
-        quantizer_conv_weights.run_forward()
+        quantizer_conv_weights.run_forward_no_scaling_factor()
         scaling_factor = quantizer_conv_weights.scaling_factor
 
         ths = defaultdict(list)
@@ -292,7 +283,7 @@ def pass_compute_thresholds(graph):
 
         # TODO: make '3' function on the number of bits of the number of bits
         for value in range(0, 3):
-            for idx in range(scaling_factor.size):
+            for idx in range(conv_node.channel):
 
                 # assume that the output value will be a 16-bit signed integer
                 n = 2**15
@@ -345,7 +336,7 @@ def pass_compute_thresholds(graph):
         conv_node.add_outputs({'Y': out_ops})
 
         # TODO: temporary (only for drawing better graphs)
-        batch_norm_node.remove_input('X')
+        # batch_norm_node.remove_input('X')
 
 
 def pass_pack_weights(graph):
@@ -401,6 +392,47 @@ def pass_pack_weights(graph):
                         break
 
 
+def pass_quantize_convolutions(graph):
+
+    gm = GraphMatcher(graph)
+
+    matches = list()
+    p = Pattern('Conv')
+    gm.get_op_type_matches(p, matches)
+
+    for m in matches:
+        conv_node = m.node
+
+        # check if this is a quantized convolution
+        if not conv_node.quantizer or not conv_node.a_quantizer:
+            continue
+
+        # Mark as quantized convolution
+        conv_node.is_quantized = True
+
+        # change the output data type of the convolution if thresholds are available
+        if conv_node.has_thresholds:
+            conv_node.dtype = QUANTIZED_NOT_PACKED
+
+        # change the output data type of the quantizers
+        conv_node.quantizer.dtype = Uint32
+        for qtz in conv_node.a_quantizer:
+            qtz.dtype = QUANTIZED_NOT_PACKED
+
+
+def pass_propagate_datatypes(graph):
+
+    gm = GraphMatcher(graph)
+
+    matches = list()
+    p = Pattern('*')
+    gm.get_op_type_matches(p, matches)
+
+    for m in matches:
+        if m.node.op_type != 'Conv' and m.node.preserve_quantization:
+            m.node.dtype = m.node.input_nodes[0].dtype
+
+
 def optimize_graph_step(model: Model, config: Config) -> None:
     """Optimze graph in the model.
 
@@ -431,21 +463,21 @@ def optimize_graph_step(model: Model, config: Config) -> None:
 
     pass_compute_thresholds(graph)
     pass_pack_weights(graph)
+    pass_quantize_convolutions(graph)
+    pass_propagate_datatypes(graph)
 
-    # processed_nodes = []
-    # while pass_precompute(graph, processed_nodes=processed_nodes):
-    #     pass
-    # pass_print(graph, 'After precompute')
+    processed_nodes = []
+    while pass_precompute(graph, processed_nodes=processed_nodes):
+        pass
+    pass_print(graph, 'After precompute')
 
     pass_dot_graph(graph, '/tmp/final.dot')
 
     optim = Optimizer()
     # optim.transpose_NHWC(graph)
-    optim.precompute(graph, config.activate_hard_quantization)
-    if config.threshold_skipping:
-        optim.threshold_skipping(graph)
-
-
+    # optim.precompute(graph, config.activate_hard_quantization)
+    # if config.threshold_skipping:
+    #    optim.threshold_skipping(graph)
 
 
 def generate_code_step(model: Model, config: Config) -> None:

From ba900c588ac71d415d0d4925ec1bfd40376e171a Mon Sep 17 00:00:00 2001
From: Antonio <antonio@leapmind.io>
Date: Mon, 10 Dec 2018 12:17:39 +0900
Subject: [PATCH 06/45] Added base code so Neil-san can work on threshold
 skipping

---
 dlk/python/dlk/scripts/generate_project.py | 23 +++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py
index 960824834..7ff0401cd 100644
--- a/dlk/python/dlk/scripts/generate_project.py
+++ b/dlk/python/dlk/scripts/generate_project.py
@@ -32,7 +32,7 @@
 from code_generater import CodeGenerater
 from frontend import TensorFlowIO
 from core.graph_pattern_matching import GraphMatcher, Pattern, match_to_execution_list
-from core.operators import Constant
+from core.operators import Constant, Operator
 from modules.packer import Packer
 from core.data_types import Uint32, QUANTIZED_NOT_PACKED
 from typing import cast
@@ -265,10 +265,28 @@ def pass_compute_thresholds(graph):
 
     for m in matches:
 
+        # TODO: Neil-san, please use this to apply your threshold. 'p' is the path from qtz to conv (both included)
+        # TODO: Neil-san, you can access to the quantizers thorugh 'conv.a_quantizer' and 'conv.quantizer'
+        # p = [m.node]
+        # while p[-1].op_type != 'Conv':
+        #     non_variable_input = [inode for inode in p[-1].input_nodes
+        #                           if (not cast(Operator, inode).is_variable and inode.is_monotonic)
+        #                           or inode.op_type == 'Conv']
+        #     if len(non_variable_input) != 1:
+        #         break
+        #     p.append(non_variable_input[-1])
+        #
+        # if p[-1].op_type != 'Conv':
+        #     continue
+        # quantizer_conv_output_node = p[0]
+        # conv_node = p[-1]
+
+        # TODO: Neil-san, you can delete this
         quantizer_conv_output_node = m.node
         batch_norm_node = quantizer_conv_output_node.input_nodes[0]
         conv_node = batch_norm_node.input_nodes[0]
 
+        # TODO: Neil-san, you should keep this
         # check if this is a quantized convolution
         if not conv_node.quantizer or not conv_node.a_quantizer:
             continue
@@ -306,6 +324,7 @@ def pass_compute_thresholds(graph):
 
                 ths[idx].append(low)
 
+        # TODO: Neil-san, you don't probably need this
         # check if increasing, decreasing or constant
         for channel, values in computed_quantized_results.items():
             if len(values) == 1:
@@ -319,12 +338,14 @@ def pass_compute_thresholds(graph):
                 else:
                     ths[channel].append(-1)
 
+        # TODO: Neil-san, you keep the things in a list already
         # put everything into a list to be compatible with the rest of the code
         ths_list = []
         for channel in sorted(ths.keys()):
             ths_list += ths[channel]
         conv_node.thresholds = ths_list
 
+        # TODO: Neil-san, you should keep this
         # Disconnect batchnorm and the quantizer
         out_ops = quantizer_conv_output_node.output_ops['output']
         for output_node in out_ops:

From 58cbe844cb25042770df24c98cb8036f777e7cab Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Mon, 10 Dec 2018 16:13:59 +0900
Subject: [PATCH 07/45] changes to the threshold skipping pass

---
 dlk/python/dlk/core/operators.py           |  20 +++
 dlk/python/dlk/scripts/generate_project.py | 134 ++++++++-------------
 2 files changed, 73 insertions(+), 81 deletions(-)

diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py
index 369860de2..082ea8a16 100644
--- a/dlk/python/dlk/core/operators.py
+++ b/dlk/python/dlk/core/operators.py
@@ -1377,6 +1377,17 @@ def run(self, **kwargs) -> Dict:
         kwargs['data'] = scale * x_norm + beta
         return kwargs
 
+    def de_run(self, **kwargs) -> Dict:
+        scale = np.float64(self._input_ops['scale'].data)
+        beta = np.float64(self._input_ops['B'].data)
+        mean = np.float64(self._input_ops['mean'].data)
+        var = np.float64(self._input_ops['var'].data)
+
+        kwargs['nega_idx'] = [v for v in range(len(scale)) if scale[v] < 0]
+
+        kwargs['data'] = (((kwargs['data'] - beta) / scale) * np.sqrt(var + self.epsilon)) + mean
+        return kwargs
+
     def run_forward(self) -> np.ndarray:
         kwdata = {'data': self.input_ops['X'].data}
         data_dict = self.run(**kwdata)
@@ -1445,6 +1456,15 @@ def run(self, **kwargs) -> Dict:
         kwargs['data'] = np.round(in_data * n / max_value).astype(np.int32)
         return kwargs
 
+    def de_run(self, **kwargs) -> Dict:
+        bit = self._input_ops['Y'].data
+        max_value = np.float64(self._input_ops['Z'].data)
+        in_data = np.float64(kwargs['data'])
+
+        n = 2 ** bit - 1
+        kwargs['data'] = (in_data * np.float64(max_value)) / np.float64(n)
+        return kwargs
+
     def run_forward(self) -> np.ndarray:
         data_dict = self.run(data=self._input_ops['X'].data)
         self._data = data_dict['data']
diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py
index 7ff0401cd..e9f05d378 100644
--- a/dlk/python/dlk/scripts/generate_project.py
+++ b/dlk/python/dlk/scripts/generate_project.py
@@ -22,6 +22,7 @@
 import click
 from os import path
 import shutil
+import math
 import numpy as np
 
 from core.config import Config
@@ -243,23 +244,8 @@ def pass_compute_thresholds(graph):
 
     gm = GraphMatcher(graph)
 
-    quantization_types_pattern = \
-        'QTZ_linear_mid_tread_half'
-
     matches = list()
-    p = Pattern(quantization_types_pattern,
-                [
-                    Pattern('BatchNormalization',
-                            [
-                                Pattern('Conv'),
-                                Pattern('*'),
-                                Pattern('*'),
-                                Pattern('*'),
-                                Pattern('*')
-                            ]),
-                    Pattern('*'),
-                    Pattern('*'),
-                ])
+    p = Pattern('QTZ_linear_mid_tread_half')
 
     gm.get_op_type_matches(p, matches)
 
@@ -267,24 +253,19 @@ def pass_compute_thresholds(graph):
 
         # TODO: Neil-san, please use this to apply your threshold. 'p' is the path from qtz to conv (both included)
         # TODO: Neil-san, you can access to the quantizers thorugh 'conv.a_quantizer' and 'conv.quantizer'
-        # p = [m.node]
-        # while p[-1].op_type != 'Conv':
-        #     non_variable_input = [inode for inode in p[-1].input_nodes
-        #                           if (not cast(Operator, inode).is_variable and inode.is_monotonic)
-        #                           or inode.op_type == 'Conv']
-        #     if len(non_variable_input) != 1:
-        #         break
-        #     p.append(non_variable_input[-1])
-        #
-        # if p[-1].op_type != 'Conv':
-        #     continue
-        # quantizer_conv_output_node = p[0]
-        # conv_node = p[-1]
-
-        # TODO: Neil-san, you can delete this
-        quantizer_conv_output_node = m.node
-        batch_norm_node = quantizer_conv_output_node.input_nodes[0]
-        conv_node = batch_norm_node.input_nodes[0]
+        p = [m.node]
+        while p[-1].op_type != 'Conv':
+            non_variable_input = [inode for inode in p[-1].input_nodes
+                                  if (not cast(Operator, inode).is_variable and inode.is_monotonic)
+                                  or inode.op_type == 'Conv']
+            if len(non_variable_input) != 1:
+                break
+            p.append(non_variable_input[-1])
+
+        if p[-1].op_type != 'Conv':
+            continue
+        quantizer_conv_output_node = p[0]
+        conv_node = p[-1]
 
         # TODO: Neil-san, you should keep this
         # check if this is a quantized convolution
@@ -295,55 +276,46 @@ def pass_compute_thresholds(graph):
         quantizer_conv_weights.run_forward_no_scaling_factor()
         scaling_factor = quantizer_conv_weights.scaling_factor
 
-        ths = defaultdict(list)
-        computed_quantized_results = defaultdict(set)
-        magic_number = 2
-
         # TODO: make '3' function on the number of bits of the number of bits
-        for value in range(0, 3):
-            for idx in range(conv_node.channel):
-
-                # assume that the output value will be a 16-bit signed integer
-                n = 2**15
-                low = -n + 1
-                high = n - 2
-
-                # binary search
-                while low <= high:
-                    mid = low + (high - low) // 2
-                    input_data = (scaling_factor * mid) * 2.0 / 3.0 # TODO: get from quantizers (n_bits, max_value)
-                    data_dict = batch_norm_node.run(data=input_data)
-                    data_dict = quantizer_conv_output_node.run(data=data_dict['data'])
-                    result = data_dict['data'][idx]
-                    computed_quantized_results[idx].add(result)
-
-                    if result > value:
-                        high = mid - 1
-                    else:
-                        low = mid + 1
-
-                ths[idx].append(low)
-
-        # TODO: Neil-san, you don't probably need this
-        # check if increasing, decreasing or constant
-        for channel, values in computed_quantized_results.items():
-            if len(values) == 1:
-                ths[channel].append(values.pop() + magic_number)
-            else:
-                first_threshold_result = values.pop()
-                second_threshold_result = values.pop()
-
-                if first_threshold_result < second_threshold_result:
-                    ths[channel].append(1)
+        # assume that the output value will be a 16-bit signed integer
+        n = 2 ** 2 - 1
+        ch = conv_node.channel
+        max_th_value = 2 ** 15
+
+        # The threshold_table is ndarray that holds the threshold values for all channels
+        threshold_table = np.empty([ch, n + 1], dtype=np.int32)
+
+        # Compute threshold (t0, t1, t2)
+        for th_id, th_v in enumerate([0.5, 1.5, 2.5]):
+            init_threshold = np.full(ch, th_v, dtype=np.float64)
+
+            # run calculation in reverse order: q -> bn -> scaling
+            # TODO: make sure the order of pattern is always valid
+            trans_th = {'data': init_threshold}
+            for op in p[:-1]:
+                trans_th = op.de_run(**trans_th)
+            threshold = (trans_th['data'] * np.float64(n)) / (np.float64(2.0) * scaling_factor)
+
+            for ch_id, th_per_ch in enumerate(threshold):
+                if quantizer_conv_weights.op_type == 'QTZ_binary_channel_wise_mean_scaling':
+                    threshold_table[ch_id, th_id] = int(math.floor(th_per_ch)) \
+                        if (scaling_factor[ch_id] < 0) ^ (ch_id in trans_th['nega_idx']) \
+                        else int(math.ceil(th_per_ch))
                 else:
-                    ths[channel].append(-1)
-
-        # TODO: Neil-san, you keep the things in a list already
-        # put everything into a list to be compatible with the rest of the code
-        ths_list = []
-        for channel in sorted(ths.keys()):
-            ths_list += ths[channel]
-        conv_node.thresholds = ths_list
+                    threshold_table[ch_id, th_id] = int(math.floor(th_per_ch)) \
+                        if (scaling_factor < 0) ^ (ch_id in trans_th['nega_idx']) \
+                        else int(math.ceil(th_per_ch))
+
+                # take care of threshold values that are larger than 16-bit signed integer
+                if abs(threshold_table[ch_id, th_id]) > max_th_value:
+                    raise ValueError(f'the threshold value {th_id} is larger than 16-bit signed integer')
+
+        for c in range(ch):
+            threshold_table[c, -1] = 1 \
+                if np.all(threshold_table[c, 1:-1] > threshold_table[c, :-2], axis=0) else -1
+
+        # Put the thresholds into list
+        conv_node.thresholds = threshold_table.flatten().tolist()
 
         # TODO: Neil-san, you should keep this
         # Disconnect batchnorm and the quantizer

From 299fe786a1f9d25a70bd625e06ea2335d12d21ef Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Mon, 10 Dec 2018 18:38:25 +0900
Subject: [PATCH 08/45] Temperately fix for super large threshold value over
 16bit int

---
 dlk/python/dlk/scripts/generate_project.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py
index e9f05d378..8643ddf1e 100644
--- a/dlk/python/dlk/scripts/generate_project.py
+++ b/dlk/python/dlk/scripts/generate_project.py
@@ -280,7 +280,7 @@ def pass_compute_thresholds(graph):
         # assume that the output value will be a 16-bit signed integer
         n = 2 ** 2 - 1
         ch = conv_node.channel
-        max_th_value = 2 ** 15
+        max_th_value = 2 ** 15 - 1
 
         # The threshold_table is ndarray that holds the threshold values for all channels
         threshold_table = np.empty([ch, n + 1], dtype=np.int32)
@@ -306,13 +306,14 @@ def pass_compute_thresholds(graph):
                         if (scaling_factor < 0) ^ (ch_id in trans_th['nega_idx']) \
                         else int(math.ceil(th_per_ch))
 
-                # take care of threshold values that are larger than 16-bit signed integer
-                if abs(threshold_table[ch_id, th_id]) > max_th_value:
-                    raise ValueError(f'the threshold value {th_id} is larger than 16-bit signed integer')
+        # take care of threshold values that are larger than 16-bit signed integer
+        threshold_table[abs(threshold_table) > max_th_value] = max_th_value
 
         for c in range(ch):
             threshold_table[c, -1] = 1 \
                 if np.all(threshold_table[c, 1:-1] > threshold_table[c, :-2], axis=0) else -1
+            if np.all(threshold_table[c, 1:-1] == threshold_table[c, :-2], axis=0):
+                threshold_table[c, -1] = 2
 
         # Put the thresholds into list
         conv_node.thresholds = threshold_table.flatten().tolist()

From 7052906f3f2e2edb5fe808d1108f0add228f5121 Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Tue, 11 Dec 2018 10:59:52 +0900
Subject: [PATCH 09/45] Applying conditions for hq and ts

---
 dlk/python/dlk/scripts/generate_project.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py
index 8643ddf1e..09b7f3e0f 100644
--- a/dlk/python/dlk/scripts/generate_project.py
+++ b/dlk/python/dlk/scripts/generate_project.py
@@ -312,6 +312,7 @@ def pass_compute_thresholds(graph):
         for c in range(ch):
             threshold_table[c, -1] = 1 \
                 if np.all(threshold_table[c, 1:-1] > threshold_table[c, :-2], axis=0) else -1
+            # Applying the magic number
             if np.all(threshold_table[c, 1:-1] == threshold_table[c, :-2], axis=0):
                 threshold_table[c, -1] = 2
 
@@ -452,12 +453,15 @@ def optimize_graph_step(model: Model, config: Config) -> None:
     pass_print(graph, 'After transpose')
     pass_dot_graph(graph, '/tmp/transposed.dot')
 
-    pass_propagate_quantization_details_into_conv(graph)
-    pass_print(graph, 'After propagate')
-
-    pass_compute_thresholds(graph)
-    pass_pack_weights(graph)
-    pass_quantize_convolutions(graph)
+    if config.activate_hard_quantization:
+        pass_propagate_quantization_details_into_conv(graph)
+        pass_print(graph, 'After propagate')
+
+        if config.threshold_skipping:
+            pass_compute_thresholds(graph)
+        pass_pack_weights(graph)
+        pass_quantize_convolutions(graph)
+        
     pass_propagate_datatypes(graph)
 
     processed_nodes = []

From a669ca592f47a21bf6b6f04bbfc7bd5ed2640a39 Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Tue, 11 Dec 2018 13:19:59 +0900
Subject: [PATCH 10/45] Move the axis input of split operator to attribute

---
 dlk/python/dlk/core/view.py                   | 2 +-
 dlk/python/dlk/plugins/tf.py                  | 3 +++
 dlk/python/dlk/scripts/generate_project.py    | 7 +++----
 dlk/python/dlk/templates/include/func/split.h | 2 +-
 4 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/dlk/python/dlk/core/view.py b/dlk/python/dlk/core/view.py
index d711576a0..668725de9 100644
--- a/dlk/python/dlk/core/view.py
+++ b/dlk/python/dlk/core/view.py
@@ -706,7 +706,7 @@ def run(self):
                 """
             )
         elif self.op.op_type == 'Split':
-            if len(input_ops) != 2:
+            if len(input_ops) != 1:
                 self.raise_invalid_args_exception(op, input_ops, output_ops)
 
             inputs_string = self.inputs_to_string(input_ops)
diff --git a/dlk/python/dlk/plugins/tf.py b/dlk/python/dlk/plugins/tf.py
index 584fc1e6f..c652698ce 100644
--- a/dlk/python/dlk/plugins/tf.py
+++ b/dlk/python/dlk/plugins/tf.py
@@ -1027,6 +1027,9 @@ def infer_dtype() -> DataType:
                 dimension_format=current_format,
                 split=num_split
             )
+            input_axis_name = input_ops_order[0]
+            nodes_to_remove.append(new_op.input_ops[input_axis_name])
+            new_op.remove_input(input_axis_name)
         else:
             raise UnsupportedNode(
                 f'TensorFlow importer cannot convert {op_type} operator node!')
diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py
index 09b7f3e0f..328c00653 100644
--- a/dlk/python/dlk/scripts/generate_project.py
+++ b/dlk/python/dlk/scripts/generate_project.py
@@ -277,10 +277,10 @@ def pass_compute_thresholds(graph):
         scaling_factor = quantizer_conv_weights.scaling_factor
 
         # TODO: make '3' function on the number of bits of the number of bits
-        # assume that the output value will be a 16-bit signed integer
         n = 2 ** 2 - 1
         ch = conv_node.channel
-        max_th_value = 2 ** 15 - 1
+        # assume that the threshold values will be a 13-bit signed integer
+        max_th_value = 2 ** 12 - 1
 
         # The threshold_table is ndarray that holds the threshold values for all channels
         threshold_table = np.empty([ch, n + 1], dtype=np.int32)
@@ -290,7 +290,6 @@ def pass_compute_thresholds(graph):
             init_threshold = np.full(ch, th_v, dtype=np.float64)
 
             # run calculation in reverse order: q -> bn -> scaling
-            # TODO: make sure the order of pattern is always valid
             trans_th = {'data': init_threshold}
             for op in p[:-1]:
                 trans_th = op.de_run(**trans_th)
@@ -461,7 +460,7 @@ def optimize_graph_step(model: Model, config: Config) -> None:
             pass_compute_thresholds(graph)
         pass_pack_weights(graph)
         pass_quantize_convolutions(graph)
-        
+
     pass_propagate_datatypes(graph)
 
     processed_nodes = []
diff --git a/dlk/python/dlk/templates/include/func/split.h b/dlk/python/dlk/templates/include/func/split.h
index 8960025b0..524a88b17 100644
--- a/dlk/python/dlk/templates/include/func/split.h
+++ b/dlk/python/dlk/templates/include/func/split.h
@@ -20,7 +20,7 @@ limitations under the License.
 #include "time_measurement.h"
 
 template<class T>
-void func_Split(int32_t axis, T input[], T *outputs[], T_UINT num_split, T_UINT out_height, T_UINT out_width, T_UINT out_depth)
+void func_Split(T input[], T *outputs[], T_UINT num_split, T_UINT out_height, T_UINT out_width, T_UINT out_depth)
 {
   Measurement::Start("func_SpliT");
 

From 15b91353f9b8fa8507d94709bb43fbc7a9af910f Mon Sep 17 00:00:00 2001
From: Antonio <antonio@leapmind.io>
Date: Tue, 11 Dec 2018 18:45:21 +0900
Subject: [PATCH 11/45] Propagate output data type to last quantized
 convolution

---
 dlk/python/dlk/scripts/generate_project.py | 47 +++++++++++-----------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py
index 328c00653..6810229d9 100644
--- a/dlk/python/dlk/scripts/generate_project.py
+++ b/dlk/python/dlk/scripts/generate_project.py
@@ -45,23 +45,6 @@
 ROOT_DIR = path.abspath(path.join(SCRITPS_DIR, '../../..'))
 
 
-def pass_print(graph: Graph, name=str()):
-
-    gm = GraphMatcher(graph)
-
-    print('--- ', name, '---')
-    matches = list()
-    p = Pattern("*")
-    gm.get_op_type_matches(p, matches)
-
-    for m in matches:
-        print('Match: ', m.node.name, m.node.op_type, m.node.dimension)
-        for input_node in m.node.input_nodes:
-            print('   -> ', input_node.name, input_node.op_type)
-
-    print('---')
-
-
 def pass_dot_graph(graph: Graph, filename):
 
     dot_script = 'digraph {'
@@ -427,6 +410,28 @@ def pass_propagate_datatypes(graph):
             m.node.dtype = m.node.input_nodes[0].dtype
 
 
+def pass_propagate_output_type_backward(graph):
+
+    gm = GraphMatcher(graph)
+
+    matches = list()
+    p = Pattern('*')
+
+    gm.get_op_type_matches(p, matches)
+
+    def find_input(node, otype):
+        for n in node.input_nodes:
+            if n.op_type == 'Conv' and n.is_quantized:
+                n.dtype = otype
+                return
+            find_input(n, otype)
+
+    output_node = matches[-1].node
+
+    output_type = output_node.dtype
+    find_input(output_node, output_type)
+
+
 def optimize_graph_step(model: Model, config: Config) -> None:
     """Optimze graph in the model.
 
@@ -441,23 +446,19 @@ def optimize_graph_step(model: Model, config: Config) -> None:
     """
     graph: Graph = model.graph
 
-    pass_print(graph, 'Before')
     pass_dot_graph(graph, '/tmp/original.dot')
 
     pass_remove_identities(graph)
-    pass_print(graph, 'After identity')
     pass_dot_graph(graph, '/tmp/prune_identities.dot')
 
     pass_transpose(graph)
-    pass_print(graph, 'After transpose')
     pass_dot_graph(graph, '/tmp/transposed.dot')
 
     if config.activate_hard_quantization:
         pass_propagate_quantization_details_into_conv(graph)
-        pass_print(graph, 'After propagate')
-
         if config.threshold_skipping:
             pass_compute_thresholds(graph)
+            pass_propagate_output_type_backward(graph)
         pass_pack_weights(graph)
         pass_quantize_convolutions(graph)
 
@@ -466,8 +467,6 @@ def optimize_graph_step(model: Model, config: Config) -> None:
     processed_nodes = []
     while pass_precompute(graph, processed_nodes=processed_nodes):
         pass
-    pass_print(graph, 'After precompute')
-
     pass_dot_graph(graph, '/tmp/final.dot')
 
     optim = Optimizer()

From 6301b579df07121db3476d84e6cf3052aeed24fe Mon Sep 17 00:00:00 2001
From: nlpng <neil.phuang@gmail.com>
Date: Wed, 12 Dec 2018 08:34:57 +0900
Subject: [PATCH 12/45] Type change has no effect, let's move it around...

---
 dlk/python/dlk/scripts/generate_project.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py
index 6810229d9..ef72eaefa 100644
--- a/dlk/python/dlk/scripts/generate_project.py
+++ b/dlk/python/dlk/scripts/generate_project.py
@@ -458,10 +458,12 @@ def optimize_graph_step(model: Model, config: Config) -> None:
         pass_propagate_quantization_details_into_conv(graph)
         if config.threshold_skipping:
             pass_compute_thresholds(graph)
-            pass_propagate_output_type_backward(graph)
+            # pass_propagate_output_type_backward(graph)
         pass_pack_weights(graph)
         pass_quantize_convolutions(graph)
 
+    if config.threshold_skipping:
+        pass_propagate_output_type_backward(graph)
     pass_propagate_datatypes(graph)
 
     processed_nodes = []

From 3a9aecf3bb81417eaff0a856e5753d48cf7bb63c Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Wed, 12 Dec 2018 09:40:50 +0900
Subject: [PATCH 13/45] Fix PEP8s

---
 dlk/python/dlk/core/operators.py           | 2 --
 dlk/python/dlk/scripts/generate_project.py | 9 ++-------
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py
index 082ea8a16..a8b234ac8 100644
--- a/dlk/python/dlk/core/operators.py
+++ b/dlk/python/dlk/core/operators.py
@@ -775,8 +775,6 @@ def binarizer(self, data: np.ndarray) -> np.ndarray:
             f'operator {self.op_type} need to implement the binarizer method')
 
 
-
-
 class QTZ_binary_mean_scaling(Quantizer):
     """Quantization operator using binary scaling.
 
diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py
index ef72eaefa..410f45f24 100644
--- a/dlk/python/dlk/scripts/generate_project.py
+++ b/dlk/python/dlk/scripts/generate_project.py
@@ -55,12 +55,6 @@ def pass_dot_graph(graph: Graph, filename):
         code[node.name] = counter
         counter += 1
 
-    # for node in graph.operators:
-    #     for input_node in node.input_nodes:
-    #
-    #         dot_script += '"' + format(code[input_node.name], '04X') + '-' + input_node.op_type + '"' + ' -> ' \
-    #                     + '"' + format(code[node.name], '04X') + '-' + node.op_type + '"' + ';'
-
     for node in graph.operators:
 
         shape = '-'
@@ -68,7 +62,8 @@ def pass_dot_graph(graph: Graph, filename):
             shape = 'x'.join(str(x) for x in node.shape)
         shape += '(' + node.dimension + ')'
 
-        dot_script += node.name + '[label="<f0> ' + format(code[node.name], '04X') + '| <f1> ' + node.op_type + '| <f2> ' + shape + '| <f3> ' + node.dtype.cpptype() + '" shape = "record"];'
+        dot_script += node.name + '[label="<f0> ' + format(code[node.name], '04X') + '| <f1> ' + \
+                      node.op_type + '| <f2> ' + shape + '| <f3> ' + node.dtype.cpptype() + '" shape = "record"];'
         for i in node.input_nodes:
             dot_script += i.name + ' -> ' + node.name + ';'
 

From e6532815ee66203c9354fed0e57855310adb5b94 Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Wed, 12 Dec 2018 10:07:24 +0900
Subject: [PATCH 14/45] Fix PEP8 again

---
 dlk/python/dlk/scripts/generate_project.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py
index 410f45f24..3b5a10709 100644
--- a/dlk/python/dlk/scripts/generate_project.py
+++ b/dlk/python/dlk/scripts/generate_project.py
@@ -63,7 +63,7 @@ def pass_dot_graph(graph: Graph, filename):
         shape += '(' + node.dimension + ')'
 
         dot_script += node.name + '[label="<f0> ' + format(code[node.name], '04X') + '| <f1> ' + \
-                      node.op_type + '| <f2> ' + shape + '| <f3> ' + node.dtype.cpptype() + '" shape = "record"];'
+            node.op_type + '| <f2> ' + shape + '| <f3> ' + node.dtype.cpptype() + '" shape = "record"];'
         for i in node.input_nodes:
             dot_script += i.name + ' -> ' + node.name + ';'
 
@@ -229,8 +229,6 @@ def pass_compute_thresholds(graph):
 
     for m in matches:
 
-        # TODO: Neil-san, please use this to apply your threshold. 'p' is the path from qtz to conv (both included)
-        # TODO: Neil-san, you can access to the quantizers thorugh 'conv.a_quantizer' and 'conv.quantizer'
         p = [m.node]
         while p[-1].op_type != 'Conv':
             non_variable_input = [inode for inode in p[-1].input_nodes
@@ -245,7 +243,6 @@ def pass_compute_thresholds(graph):
         quantizer_conv_output_node = p[0]
         conv_node = p[-1]
 
-        # TODO: Neil-san, you should keep this
         # check if this is a quantized convolution
         if not conv_node.quantizer or not conv_node.a_quantizer:
             continue

From eaa98571c049c87f33db6314829e2bd177f46103 Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Wed, 12 Dec 2018 18:14:45 +0900
Subject: [PATCH 15/45] Refactoring optimizer with new passes

---
 dlk/python/dlk/core/optimizer.py           | 857 ++++++++-------------
 dlk/python/dlk/scripts/generate_project.py | 408 +---------
 2 files changed, 317 insertions(+), 948 deletions(-)

diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py
index 27e03939a..c7a869cf5 100644
--- a/dlk/python/dlk/core/optimizer.py
+++ b/dlk/python/dlk/core/optimizer.py
@@ -14,632 +14,401 @@
 # limitations under the License.
 # =============================================================================
 """Module of optimization passes."""
+import math
 import numpy as np
-from core.data_types import DataType, Float32, Float64, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64, \
-    Bool, String, QUANTIZED_NOT_PACKED
-from core.graph import Graph, GraphRunner
-from core.operators import Add, AveragePool, BatchNormalization, Constant, Conv, Identity, Input, \
-    MaxPool, Operator, Output, Transpose, Quantizer, QTZ_binary_mean_scaling, QTZ_linear_mid_tread_half, \
-    Reshape, Softmax, Relu, Flatten, Dropout, Gemm, SpaceToDepth, QTZ_binary_channel_wise_mean_scaling, ConcatOnDepth,\
-    Maximum, DepthToSpace, Split, Variable
 
 from typing import Any, Dict, List, Optional, Set, cast
-from functools import reduce
-from enum import Enum
 
+from core.graph import Graph
+from core.graph_pattern_matching import GraphMatcher, Pattern, match_to_execution_list, NodeMatch
+from core.operators import Constant, Operator
+from core.data_types import Uint32, QUANTIZED_NOT_PACKED
+from typing import cast
+from collections import defaultdict
 from modules.packer import Packer
 
-NodeGroup = List[Operator]
 
+def pass_dot_graph(graph: Graph, filename) -> None:
 
-def node_is_add(node: Operator) -> bool:
-    return node.op_type == 'Add'
+    dot_script = 'digraph {'
 
+    code = {}
+    counter = 0
+    for node in graph.operators:
+        code[node.name] = counter
+        counter += 1
 
-def node_is_conv(node: Operator) -> bool:
-    return node.op_type == 'Conv'
+    for node in graph.operators:
 
+        shape = '-'
+        if node.shape:
+            shape = 'x'.join(str(x) for x in node.shape)
+        shape += '(' + node.dimension + ')'
 
-def node_is_concat(node: Operator) -> bool:
-    return node.op_type == 'ConcatV2'
+        dot_script += node.name + '[label="<f0> ' + format(code[node.name], '04X') + '| <f1> ' + \
+            node.op_type + '| <f2> ' + shape + '| <f3> ' + node.dtype.cpptype() + '" shape = "record"];'
+        for i in node.input_nodes:
+            dot_script += i.name + ' -> ' + node.name + ';'
 
+    dot_script += '}'
 
-def node_is_const(node: Operator) -> bool:
-    return node.op_type == 'Constant'
+    with open(filename, 'w') as f:
+        f.write(dot_script)
 
 
-def node_is_qconv(node: Operator) -> bool:
-    return node.op_type == 'Conv' and cast(Conv, node).is_quantized
+def pass_remove_identities(graph: Graph) -> None:
 
+    gm = GraphMatcher(graph)
 
-def node_is_input(node: Operator) -> bool:
-    return node.op_type == 'Input'
+    to_be_removed = list()
+    matches: List[NodeMatch] = list()
+    p = Pattern("Identity")
+    gm.get_op_type_matches(p, matches)
 
+    for m in matches:
+        """skip all identity."""
+        in_op = m.node.input_ops['input']
+        out_ops = m.node.output_ops['output']
+        for out_op in out_ops:
+            for k, v in out_op.input_ops.items():
+                if v == m.node:
+                    # change the output's input to this identity's input
+                    out_op.add_input(k, in_op)
+                    # change the input's output to this identity's output
+                    for k2, v2 in in_op.output_ops.items():
+                        if m.node in v2:
+                            v2.remove(m.node)
+                            v2.append(out_op)
+                            break
+                    break
 
-def node_is_weight_quantizer(node: Operator) -> bool:
-    return (node.op_type == 'QTZ_binary_mean_scaling'
-            or node.op_type == 'QTZ_binary_channel_wise_mean_scaling')
+        to_be_removed.append(m.node)
 
+    for op in to_be_removed:
+        graph.remove_op(op)
 
-def node_is_activation_quantizer(node: Operator) -> bool:
-    return node.op_type == 'QTZ_linear_mid_tread_half'
 
+def pass_transpose(graph: Graph) -> None:
 
-class NHWC_Transposer(GraphRunner):
-    """Transposer of all nodes to NHWC."""
+    gm = GraphMatcher(graph)
 
-    def _get_permutation(self, dim: str) -> List[int]:
-        """Create a permutation from the source dimension."""
-        assert len(dim) == 4 and set(dim).issubset({'N', 'H', 'W', 'C', 'I', 'O'}), \
-            f'illegal dimension found: {dim}'
+    matches: List[NodeMatch] = list()
+    p = Pattern("*")
+    gm.get_op_type_matches(p, matches)
 
-        if set(dim) == set('HWIO'):
-            dim = dim.replace('I', 'C')
-            dim = dim.replace('O', 'N')
+    for m in matches:
+        dim = m.node.dimension
+        shape = m.node.shape
+        if len(shape) != 4 or len(dim) != 4 or not set(dim).issubset({'N', 'H', 'W', 'C', 'I', 'O'}):
+            continue
 
-        return list(map(lambda s: dim.index(s), 'NHWC'))
+        dim = dim.replace('I', 'C')
+        dim = dim.replace('O', 'N')
 
-    def _check_and_transpose(self, node: Operator) -> None:
-        perm = self._get_permutation(node.dimension)
-        node.transpose(perm)
+        permutation = list(map(lambda s: dim.index(s), 'NHWC'))
+        m.node.transpose(permutation)
 
-    def run_backward_input(self, node: Input, **kwargs: Any) -> None:
-        self._check_and_transpose(node)
 
-    def run_backward_constant(self, node: Constant, **kwargs: Any) -> None:
-        if node.ndims == 4 and set(node.dimension).issubset({'N', 'H', 'W', 'C', 'I', 'O'}):
-            self._check_and_transpose(node)
+def pass_precompute(graph: Graph, processed_nodes) -> bool:
 
-    def run_backward_identity(self, node: Identity, **kwargs: Any) -> None:
-        if node.ndims == 4 and set(node.dimension).issubset({'N', 'H', 'W', 'C', 'I', 'O'}):
-            self._check_and_transpose(node)
+    gm = GraphMatcher(graph)
 
-    def run_backward_QTZ_binary_mean_scaling(self, node: QTZ_binary_mean_scaling, **kwargs: Any) -> None:
-        self._check_and_transpose(node)
+    matches: List[NodeMatch] = list()
+    p = Pattern('*')
+    gm.get_op_type_matches(p, matches)
 
-    def run_backward_transpose(self, node: Transpose, **kwargs: Any) -> None:
-        raise NotImplementedError('Transposing Transpose operator is not supported yet.')
+    processed_before_precompute = len(processed_nodes)
 
-    def run_backward_conv(self, node: Conv, **kwargs: Any) -> None:
-        self._check_and_transpose(node)
+    for m in matches:
+        if m.node in processed_nodes:
+            continue
 
-    def run_backward_batch_normalization(self, node: BatchNormalization, **kwargs: Any) -> None:
-        self._check_and_transpose(node)
+        # We want operators with inputs
+        if not m.node.input_nodes:
+            continue
 
-    def run_backward_QTZ_linear_mid_tread_half(self, node: QTZ_linear_mid_tread_half, **kwargs: Any) -> None:
-        self._check_and_transpose(node)
+        precomputable = True
+        for input_node in m.node.input_nodes:
+            if input_node.op_type != 'Constant':
+                precomputable = False
 
-    def run_backward_max_pool(self, node: MaxPool, **kwargs: Any) -> None:
-        self._check_and_transpose(node)
+        if not precomputable:
+            continue
 
-    def run_backward_average_pool(self, node: AveragePool, **kwargs: Any) -> None:
-        self._check_and_transpose(node)
+        processed_nodes += m.node.input_nodes
+        processed_nodes.append(m.node)
 
-    def run_backward_SpaceToDepth(self, node: SpaceToDepth, **kwargs: Any) -> None:
-        self._check_and_transpose(node)
+        data = m.node.run_forward()
 
-    def run_backward_QTZ_binary_channel_wise_mean_scaling(
-            self,
-            node: QTZ_binary_channel_wise_mean_scaling,
-            **kwargs: Any) -> None:
-        self._check_and_transpose(node)
+        new_constant = Constant(
+            m.node.name + '_new',
+            m.node.dtype,
+            data,
+            dimension_format=m.node.dimension
+        )
 
-    def run_backward_ConcatOnDepth(self, node: ConcatOnDepth, **kwargs: Any) -> None:
-        self._check_and_transpose(node)
+        graph.add_op(new_constant)
 
-    def run_backward_Maximum(self, node: Maximum, **kwargs: Any) -> None:
-        self._check_and_transpose(node)
+        new_constant.add_outputs(m.node.output_ops)
+        for output_name, consumer_list in m.node.output_ops.items():
+            for consumer_node in consumer_list:
+                for input_name, input_node in consumer_node.input_ops.items():
+                    if input_node == m.node:
+                        consumer_node.add_input(input_name, new_constant)
+                        break
+    return len(processed_nodes) > processed_before_precompute
 
-    def run_backward_DepthToSpace(self, node: DepthToSpace, **kwargs: Any) -> None:
-        self._check_and_transpose(node)
 
+def pass_propagate_quantization_details_into_conv(graph: Graph) -> None:
 
-class PreComputeRunner(GraphRunner):
-    """Optimization class that does precomputation and pruning on the graph.
+    gm = GraphMatcher(graph)
 
-    Fron a constant node, this object precomputes as far as possible, and
-    replaces all precomputed nodes with a newly defined constant node.
+    matches: List[NodeMatch] = list()
+    p = Pattern('*')
+    gm.get_op_type_matches(p, matches)
 
-    Additionally, in the hard-quantized mode, this object replaces a
-    weight-quantizer node and succesive Conv node with a QConv node, and
-    packs the weight.
-    """
+    qtypes = [
+        'QTZ_binary_mean_scaling',
+        'QTZ_linear_mid_tread_half',
+        'QTZ_binary_channel_wise_mean_scaling'
+    ]
 
-    _quantized_bitwidth = 1
-    _wordsize = 32
+    quant_details = defaultdict(list)
+    for m in matches:
+        if not m.node.preserve_quantization:
+            quant_details[m.node.name] = []
+            continue
 
-    def __init__(self, graph: Graph, hard_quantized: bool = False) -> None:
-        """Set up internal varibles."""
-        self._precomp_dic: Dict[str, bool] = {}
-        self._nodes_removed: Set[Operator] = set()
-        self._hard_quantized = hard_quantized
-        self._quantizers: Dict[str, Quantizer] = {}  # the operator name and its quantizer
-        self._connected_convs: Dict[Operator, List[Conv]] = {}  # node name and its connected convolver
+        if m.node.op_type == 'Conv':
+            input_node = m.node.input_nodes[0]
+            weight_node = m.node.input_nodes[1]
 
-        super().__init__(graph)
+            m.node.a_quantizer = [input_node] if input_node.op_type in qtypes else quant_details[input_node.name]
+            m.node.quantizer = weight_node if weight_node.op_type in qtypes else quant_details[weight_node.name]
 
-    def initialize(self, **kwargs: Any) -> None:
-        qconvs: List[Conv] = kwargs['qconv']
-        self._connected_convs = {q: [q] for q in qconvs}
+            quant_details[m.node.name] = []
+        else:
+            qtzs = []
+            for n in m.node.input_nodes:
+                if n.op_type in qtypes:
+                    qtzs.append(n)
+                else:
+                    for q in quant_details[n.name]:
+                        qtzs.append(q)
+
+            quant_details[m.node.name] = qtzs if len(qtzs) == len(m.node.input_nodes) else []
+            # TODO: check if the quantizers use same n_bits
+
+
+def pass_compute_thresholds(graph: Graph) -> None:
+
+    gm = GraphMatcher(graph)
+
+    matches: List[NodeMatch] = list()
+    p = Pattern('QTZ_linear_mid_tread_half')
+
+    gm.get_op_type_matches(p, matches)
+
+    for m in matches:
+
+        p = [m.node]
+        while p[-1].op_type != 'Conv':
+            non_variable_input = [inode for inode in p[-1].input_nodes
+                                  if (not cast(Operator, inode).is_variable and inode.is_monotonic)
+                                  or inode.op_type == 'Conv']
+            if len(non_variable_input) != 1:
+                break
+            p.append(non_variable_input[-1])
+
+        if p[-1].op_type != 'Conv':
+            continue
+        quantizer_conv_output_node = p[0]
+        conv_node = p[-1]
+
+        # check if this is a quantized convolution
+        if not conv_node.quantizer or not conv_node.a_quantizer:
+            continue
+
+        quantizer_conv_weights = conv_node.quantizer
+        quantizer_conv_weights.run_forward_no_scaling_factor()
+        scaling_factor = quantizer_conv_weights.scaling_factor
+
+        # Getting the bit and max value
+        nbits = []
+        max_vs = []
+        for aqtz in conv_node.a_quantizer:
+            nbits.append(aqtz.nbit)
+            max_vs.append(aqtz.max_v)
+        if not (len(set(nbits)) == 1) and not (len(set(max_vs)) == 1):
+            raise ValueError(f'bits {nbits} or max values {max_vs} are not consistent')
+        else:
+            nbit = nbits[0]
+            max_v = max_vs[0]
+
+        n = 2 ** nbit - 1
+        ch = conv_node.channel
+        # assume that the threshold values will be a 13-bit signed integer
+        max_th_value = 2 ** 12 - 1
+
+        # The threshold_table is numpy array that holds the threshold values for all channels
+        threshold_table = np.empty([ch, n + 1], dtype=np.int32)
 
-    def finalize(self, **kwargs: Any) -> None:
-        """Remove all unused nodes from the graph."""
-        for n in self._nodes_removed:
-            self._graph.remove_op(n)
+        # Compute threshold (t0, t1, t2)
+        th_val = [0.5 + i for i in range(n)]
+        for th_id, th_v in enumerate(th_val):
+            init_threshold = np.full(ch, th_v, dtype=np.float64)
+
+            # run calculation in reverse order: q -> bn -> scaling
+            trans_th = {'data': init_threshold}
+            for op in p[:-1]:
+                trans_th = op.de_run(**trans_th)
+            threshold = (trans_th['data'] * np.float64(n)) / (np.float64(max_v) * scaling_factor)
 
-    # 1st phase: check which conv the node connects
+            for ch_id, th_per_ch in enumerate(threshold):
+                if quantizer_conv_weights.op_type == 'QTZ_binary_channel_wise_mean_scaling':
+                    threshold_table[ch_id, th_id] = int(math.floor(th_per_ch)) \
+                        if (scaling_factor[ch_id] < 0) ^ (ch_id in trans_th['nega_idx']) \
+                        else int(math.ceil(th_per_ch))
+                else:
+                    threshold_table[ch_id, th_id] = int(math.floor(th_per_ch)) \
+                        if (scaling_factor < 0) ^ (ch_id in trans_th['nega_idx']) \
+                        else int(math.ceil(th_per_ch))
 
-    def run_backward_by_default(self, node: Operator, **kwargs: Any) -> None:
-        outputs = node.output_op_list
+        # take care of threshold values that are larger than 16-bit signed integer
+        threshold_table[abs(threshold_table) > max_th_value] = max_th_value
 
-        convs: List[Conv] = sum([self._connected_convs[out] for out in outputs if self._connected_convs.get(out)], [])
-        self._connected_convs[node] = convs
+        for c in range(ch):
+            threshold_table[c, -1] = 1 \
+                if np.all(threshold_table[c, 1:-1] > threshold_table[c, :-2], axis=0) else -1
+            # Applying the magic number
+            if np.all(threshold_table[c, 1:-1] == threshold_table[c, :-2], axis=0):
+                threshold_table[c, -1] = 2
 
-    def run_backward_conv(self, node: Conv, **kwargs: Any) -> None:
-        pass  # do nothing, as all (quantized) conv node is already registered to self._connected_convs
+        # Put the thresholds into list
+        conv_node.thresholds = threshold_table.flatten().tolist()
 
-    # 2nd phase: precompute and prune
+        # Disconnect batchnorm and the quantizer
+        out_ops = quantizer_conv_output_node.output_ops['output']
+        for output_node in out_ops:
+            for input_name, input_node in output_node.input_ops.items():
+                if input_node == quantizer_conv_output_node:
+                    output_node.add_input(input_name, conv_node)
 
-    def _has_precompute_value(self, op: Operator) -> bool:
-        """Return True if the operator has precompute value."""
-        return self._precomp_dic[op.name]
+        conv_node.remove_output('Y')
+        conv_node.add_outputs({'Y': out_ops})
 
-    def _is_prunable(self, op: Operator) -> bool:
-        """Return True if op can be prunable."""
-        return self._has_precompute_value(op) and op.op_type != 'Constant'
 
-    def _prune(self, node: Operator) -> None:
-        """Prune the node and its inputs."""
-        # prune inputs
-        for i in node.input_ops.values():
-            if i not in self._nodes_removed:
-                self._prune(i)
+def pass_pack_weights(graph: Graph) -> None:
 
-        # prune itself
-        self._nodes_removed.add(node)
+    gm = GraphMatcher(graph)
 
-    def _precompute_or_prune_inputs(self, node: Operator) -> None:
-        """Precompute itself or prune the input nodes.
+    quantization_types = [
+        'QTZ_binary_mean_scaling',
+        'QTZ_linear_mid_tread_half',
+        'QTZ_binary_channel_wise_mean_scaling'
+    ]
 
-        If all input has precompute value, then make the node precompute.
-        Otherwise, all prunable input nodes are pruned and substituted with
-        a new constant node.
-        """
-        ops: List[Operator] = [node.input_ops[i] for i in node.input_names if node.input_ops.get(i)]
-        ops_have_precomp_values = list(map(lambda x: self._has_precompute_value(x), ops))
-        ops_are_prunable = list(map(lambda x: self._is_prunable(x), ops))
-        ops_are_in_quantized = list(map(lambda x: x.name in self._quantizers.keys(), ops))
+    matches: List[NodeMatch] = list()
+    p = Pattern('Conv')
 
-        # check which input node can be pruned
-        if reduce(lambda x, y: x and y, ops_have_precomp_values):  # all input has concrete values
-            node.run_forward()
-            self._precomp_dic[node.name] = True  # this node can be pruned
-            if reduce(lambda x, y: x or y, ops_are_in_quantized):  # some input operator to be quantized exists
-                quantizers = {op.name: self._quantizers[op.name] for op in ops if self._quantizers.get(op.name)}
-                if len(quantizers) > 1:
-                    ValueError(f'{node.name}: multiple quantized inputs with {node.op_type} are not supported.')
-                self._quantizers[node.name] = list(quantizers.values())[0]
+    gm.get_op_type_matches(p, matches)
 
-        else:
-            self._precomp_dic[node.name] = False
+    # TODO: pass proper parameters
+    packer = Packer(1, 32)
 
-            # prune input opetarots
-            for key, op in zip(node.input_names, ops):
-                if self._is_prunable(op):
-                    # get scaling factor if it is to be quantized but not in hard quantization mode
-                    scaling = 1 if self._quantizers.get(op.name) is None \
-                        else self._quantizers[op.name].scaling_factor
+    for m in matches:
+        conv_node = m.node
 
-                    extra_dims = tuple(np.ones((len(op.data.shape) - len(scaling.shape)), dtype=np.int32))
-                    scaling = scaling.reshape(scaling.shape + extra_dims)
+        # check if this is a quantized convolution
+        if not conv_node.quantizer or not conv_node.a_quantizer:
+            continue
 
-                    # creates new constant
-                    new_op = Constant(
-                        op.name + '_new',
-                        op.dtype,
-                        op.data * scaling,
-                        dimension_format=op.dimension
-                    )
+        weight_quantizer = conv_node.quantizer
+        if weight_quantizer.op_type not in quantization_types:
+            continue
 
-                    # replace and prune the old operators
-                    node.add_input(key, new_op)
-                    self._graph.add_op(new_op)
-                    self._prune(op)
+        # Quantize the weights
+        weight_quantizer.run_forward()
+        op_data = weight_quantizer.binarizer(weight_quantizer.data)
+        data = packer.run(op_data.astype(np.float32), weight_quantizer.dimension)
 
-    def run_forward_by_default(self, node: Operator, **kwargs: Any) -> None:
-        self._precompute_or_prune_inputs(node)
+        quantized_constant = Constant(
+            weight_quantizer.name + '_new',
+            Uint32(),
+            data,
+            packed=True,
+            actual_shape=weight_quantizer.shape
+        )
 
-    def run_forward_input(self, node: Input, **kwargs: Any) -> None:
-        self._precomp_dic[node.name] = False
+        graph.add_op(quantized_constant)
 
-    def run_forward_constant(self, node: Constant, **kwargs: Any) -> None:
-        self._precomp_dic[node.name] = True
+        quantized_constant.add_outputs(weight_quantizer.output_ops)
+        for output_name, consumer_list in weight_quantizer.output_ops.items():
+            for consumer_node in consumer_list:
+                for input_name, input_node in consumer_node.input_ops.items():
+                    if input_node == weight_quantizer:
+                        consumer_node.add_input(input_name, quantized_constant)
+                        break
 
-    def run_forward_identity(self, node: Identity, **kwargs: Any) -> None:
-        """skip all identity."""
-        in_op = node.input_ops['input']
-        out_ops = node.output_ops['output']
-        for out_op in out_ops:
-            for k, v in out_op.input_ops.items():
-                if v == node:
-                    # change the output's input to this identity's input
-                    out_op.add_input(k, in_op)
-                    # change the input's output to this identity's output
-                    for k2, v2 in in_op.output_ops.items():
-                        if node in v2:
-                            v2.remove(node)
-                            v2.append(out_op)
-                            break
-                    break
 
-    def run_forward_QTZ_binary_mean_scaling(self, node: QTZ_binary_mean_scaling, **kwargs: Any) -> None:
-        in_op = node.input_ops['input']
+def pass_quantize_convolutions(graph: Graph) -> None:
 
-        # if it can be precomputed
-        if self._has_precompute_value(in_op):
-            node.run_forward()
-            self._precomp_dic[node.name] = True  # this node can be pruned
-            self._quantizers[node.name] = node  # add itself as the quantizer
-        else:
-            self._precomp_dic[node.name] = False
-
-    def run_forward_conv(self, node: Conv, **kwargs: Any) -> None:
-        ops: List[Operator] = [node.input_ops[i] for i in node.input_names if node.input_ops.get(i)]
-
-        if self._hard_quantized and node in kwargs['qconv']:
-            # data is to be packed
-            ops_have_precomp_values = list(map(lambda x: self._has_precompute_value(x), ops))
-            ops_are_prunable = list(map(lambda x: self._is_prunable(x), ops))
-
-            # check which input node can be pruned
-            if reduce(lambda x, y: x and y, ops_have_precomp_values):  # all input has concrete values
-                node.run_forward()
-                self._precomp_dic[node.name] = True  # this node can be pruned
-                quantizers = {op.name: self._quantizers[op.name] for op in ops if self._quantizers.get(op.name)}
-                if len(quantizers) > 1:
-                    ValueError(f'{node.name}: multiple quantized inputs with {node.op_type} are not supported.')
-                self._quantizers[node.name] = list(quantizers.values())[0]
-
-            else:   # an input (must be weight) is to be quantized and packed
-                self._precomp_dic[node.name] = False
-                node.is_quantized = True
-                packer = Packer(self._quantized_bitwidth, self._wordsize)
-                quantizers = {op.name: self._quantizers[op.name] for op in ops if self._quantizers.get(op.name)}
-                if len(quantizers) > 1:
-                    ValueError(f'{node.name}: multiple quantized inputs with {node.op_type} are not supported.')
-                node.quantizer = list(quantizers.values())[0]
-
-                for key, op in zip(node.input_names, ops):
-
-                    if self._is_prunable(op):
-                        shape = op.shape
-                        op_data = node.quantizer.binarizer(op.data)
-                        data = packer.run(op_data.astype(np.float32), op.dimension)
-                        dtype = op.dtype
-                        new_op = Constant(
-                            op.name + '_new',
-                            dtype,
-                            data,
-                            packed=True,
-                            actual_shape=shape
-                        )
-                        node.add_input(key, new_op)
-                        self._graph.add_op(new_op)
-                        self._prune(op)
+    gm = GraphMatcher(graph)
 
-        else:
-            self._precompute_or_prune_inputs(node)
-
-    def run_forward_QTZ_binary_channel_wise_mean_scaling(
-            self,
-            node: QTZ_binary_channel_wise_mean_scaling,
-            **kwargs: Any) -> None:
-        in_op = node.input_ops['input']
-
-        # if it can be precomputed
-        if self._has_precompute_value(in_op):
-            node.run_forward()
-            self._precomp_dic[node.name] = True  # this node can be pruned
-            self._quantizers[node.name] = node  # add itself as the quantizer
-        else:
-            self._precomp_dic[node.name] = False
-
-
-class DTypeChanger(GraphRunner):
-    """Optimization class that changes dypes.
-
-    This runner must run before PrecomputeRunner.
-    """
-
-    class Path(Enum):
-        INPUT = 1,
-        WEIGHT = 2,
-        OTHER = 3
-
-    _packed_dtype = {Path.INPUT: QUANTIZED_NOT_PACKED(), Path.WEIGHT: Uint32(), Path.OTHER: Float32()}
-    _a_quantizers = {'QTZ_linear_mid_tread_half'}
-    _w_quantizers = {'QTZ_binary_mean_scaling', 'QTZ_binary_channel_wise_mean_scaling'}
-    _conv = {'Conv'}
-
-    def __init__(self, graph: Graph) -> None:
-        """Set up internal varibles."""
-        self._output_convs: Dict[Operator, List[Conv]] = {}
-        self._packed_input_path: Dict[str, Any] = {}
-
-        super().__init__(graph, depth_first=False)
-
-    # 1st phase: check nodes which dtype must be changed
-
-    def _check_dtype_state(self, node: Operator) -> None:
-        """checks the state of each node regarding dtype.
-
-        - whether the node is after conv and before activation quantizer
-        - whether the node is after activation and before conv
-        """
-        outputs = node.output_op_list
-        convs: List[Conv] = sum([self._output_convs[out] for out in outputs if self._output_convs.get(out) is not None],
-                                [])
-
-        # determine the path of node is input or weight or others
-        path = self.Path.WEIGHT
-        for out in outputs:
-            p = self._packed_input_path[out.name] if out.op_type not in self._conv \
-                else self.Path.INPUT if node == out.input_ops['X'] \
-                else self.Path.WEIGHT
-            if path == self.Path.WEIGHT:
-                path = p
-            elif path == p:
-                pass
-            else:  # output have different paths
-                ValueError('multiple outputs must have the same kind of paths.')
-
-        is_not_before_a_quantizer = reduce(lambda x, y: x and y,
-                                           [out.op_type not in self._a_quantizers for out in outputs])
-        if convs and is_not_before_a_quantizer:
-            self._output_convs[node] = convs
-
-        self._packed_input_path[node.name] = path
-
-    def run_backward_by_default(self, node: Operator, **kwargs: Any) -> None:
-        self._check_dtype_state(node)
-
-    def run_backward_output(self, node: Output, **kwargs: Any) -> None:
-        self._packed_input_path[node.name] = self.Path.OTHER
-
-    def run_backward_conv(self, node: Conv, **kwargs: Any) -> None:
-        self._output_convs[node] = [node]
-
-    # 2nd phase: change data type
-
-    def turn(self, **kwargs: Any) -> None:
-        """Set up qconv list"""
-        output_convs: List[Conv] = sum(list(self._output_convs.values()), [])
-        for conv in output_convs:
-            # get all ascendants of conv
-            ascendants = [k for k in self._output_convs.keys() if conv in self._output_convs[k]]
-
-            # whether some weight quantizer is in ascendants
-            wqtz_in_asc = reduce(lambda x, y: x or y,
-                                 list(map(lambda n: n.op_type in self._w_quantizers, ascendants)))
-            # whether some activation quantizer is in ascendants
-            aqtz_in_asc = reduce(lambda x, y: x or y,
-                                 list(map(lambda n: n.op_type in self._a_quantizers, ascendants)))
-            # if both, add conv to the list
-            if wqtz_in_asc and aqtz_in_asc:
-                kwargs['qconv'].add(conv)
-
-    def _set_dtype(self, node: Operator, qconv: List[Conv]) -> None:
-        def before_qconv() -> bool:
-            """Return if the node is before a quantized convolver"""
-            convs: List[Conv] = self._output_convs[node] if self._output_convs.get(node) else []
-            # consistency check
-            is_qconv: List[bool] = list(map(lambda x: x in qconv, convs))
-            all_is_qconv = reduce(lambda x, y: x and y, is_qconv, True)
-            some_is_qconv = reduce(lambda x, y: x or y, is_qconv, False)
-            assert convs == [] or (all_is_qconv == some_is_qconv), \
-                f'{node.name} connects to both of a quantized convolver and non-quantized one.'
-
-            return convs != [] and all_is_qconv
-
-        def get_dtype() -> Optional[DataType]:
-            """Return dtype along with which path the node is on: 'input' or 'weight' of a conv"""
-            path = self._packed_input_path.get(node.name)
-            return self._packed_dtype[path] if path is not None else None
-
-        dtype = get_dtype()
-        conv = self._output_convs.get(node)
-        if dtype is not None and before_qconv():
-            node.dtype = dtype
-
-    def run_forward_by_default(self, node: Operator, **kwargs: Any) -> None:
-        self._set_dtype(node, kwargs['qconv'])
-
-
-class ApplyThresholdSkipping(GraphRunner):
-    """Optimization class that perform threshold skipping.
-
-    This runner perform threshold skipping with BFS for DLK graph.
-    Run graphrunner backward to acquire graph info, and run forward
-    to compute the thresholds skip batchnorm and activation quantizer
-    with thresholding function.
-    """
-
-    def __init__(self, graph: Graph) -> None:
-        self._aqtz_aqtz: Dict[Operator, List[Operator]] = {}
-        self._qconv_qconv: Dict[Conv, List] = {}
-        super().__init__(graph, depth_first=False)
-
-    def _apply_threshold_skipping(self, op_lst: List[Operator]) -> None:
-        """Performs Conv thresholds computation and skipping."""
-
-        transitions: Dict[int, Operator] = {}
-        start, finish = [None, None]
-        for idx, op in enumerate(op_lst):
-            if node_is_qconv(op):
-                start = cast(Conv, op)
-            elif node_is_activation_quantizer(op):
-                finish = op
-                transitions[idx] = op
-            else:
-                transitions[idx] = op
-
-        if start is not None and finish is not None:
-
-            # def linear_qtz2float(x: np.ndarray, n_value: int, max_value: float) -> np.ndarray:
-            #     real_x = x / np.float64(n_value) * np.float64(max_value)
-            #     return real_x.astype(np.float64)
-
-            # Step 1: Compute thresholds for Convolution operators
-            # aqtzer = cast(Quantizer, start.a_quantizer[0])  # Activation Quantizers should all have the same bits
-            # bit = aqtzer.nbit
-            # max_v = aqtzer.max_v
-            # if bit is None or max_v is None:
-            #     ValueError(f'activation quantizer of node {start.name} has bit or max value of None')
-
-            # n = 2 ** bit - 1
-            # ch = start.channel
-            # lch = start.input_ops['X'].channel
-            # k = start.kernel_height * start.kernel_width * lch * n
-            # qtzer = cast(Quantizer, start.quantizer)
-            # conv_results = [x for x in range(-k, k + 1, 1)]
-            # th_tmp = np.empty([ch, n + 1], dtype=np.int32)
-            # v_now = dict.fromkeys([x for x in range(ch)], 0)
-            # th_now = 0
-            # val_neg_flag = -1
-            # val_pos_flag = 1
-            # all_transdata: Dict[int, Dict[str, Any]] = {}
-
-            # Step 1-1: initalize thresholds
-            # for conv_res in conv_results:
-            #     conv_out = np.full(ch, conv_res, dtype=np.float64)
-            #     conv_out *= qtzer.scaling_factor if qtzer.scaling_factor is not None \
-            #         else ValueError(f'oops Quantizer of node {start.name} has scaling factor of None')
-            #
-            #     conv_data = linear_qtz2float(conv_out, n, max_v)
-            #
-            #     trans_data: Dict[str, Any] = {'data': conv_data}
-            #     for idx, op in sorted(transitions.items(), reverse=True):
-            #         trans_data = op.run(**trans_data)
-            #
-            #     for depth in range(ch):
-            #         init = -k if depth in trans_data['nega_idx'] else k
-            #         th_tmp[depth, :] = init
-            #
-            #     all_transdata[conv_res] = trans_data
-
-            # Step 1-2: update thresholds
-            # for conv_res in conv_results:
-            #     trans_data = all_transdata[conv_res]
-            #     qtz_out = trans_data['data']
-            #     qtz_mu = np.mean(qtz_out)
-            #     if qtz_mu != th_now:
-            #         for depth in range(ch):
-            #             is_negative = depth in trans_data['nega_idx']
-            #             if v_now.get(depth) != qtz_out[depth]:
-            #                 if is_negative:
-            #                     th_tmp[depth, abs(n - qtz_out[depth] - 1)] = conv_res
-            #                 else:
-            #                     th_tmp[depth, qtz_out[depth] - 1] = conv_res
-            #                 v_now[depth] = qtz_out[depth]
-            #             th_tmp[depth, n] = -1 if is_negative else 1
-            #     for depth in range(ch):
-            #         constant = reduce(lambda x, y: x and y,
-            #                           [th_tmp[depth, i] == th_tmp[depth, i + 1] for i in range(n - 1)])
-            #         th_tmp[depth, n] = qtz_out[depth] + 2 if constant else th_tmp[depth, n]
-            #         # note: 2 above is a magic number. the result value must not be 1 nor -1.
-            #     th_now = qtz_mu
-
-            # start.thresholds = th_tmp.flatten().tolist()
-
-            # Step 2: Skipping unused operators, e.g. batch normalization, linear activation quantizer
-            pass
-            # if start.has_thresholds:
-            #     if start.dtype is not finish.dtype:
-            #         start.dtype = finish.dtype
-            #     for consumers in finish.output_ops.values():
-            #         for consumer in consumers:
-            #             for idex, y in start.output_ops.items():
-            #                 if not bool(set(consumers) & set(y)):
-            #                     start.remove_output(idex)
-            #                 start.add_output(idex, consumer)
-            #
-            #             for indent, v in consumer.input_ops.items():
-            #                 if v == finish:
-            #                     consumer.add_input(indent, start)
-            #                     break
-        else:
-            pass
-
-    def _makeup_skippable(self, node: Operator) -> None:
-        outputs = node.output_op_list
-        for out_op in outputs:
-            for start, lst in self._aqtz_aqtz.items():
-                if out_op in lst:
-                    self._aqtz_aqtz[start].append(node)
-
-    def _makeup_aqtz(self, node: Operator) -> None:
-        outputs = node.output_op_list
-        for out_op in outputs:
-            for start, lst in self._qconv_qconv.items():
-                if out_op in lst:
-                    self._qconv_qconv[start].append(node)
-
-    def run_backward_QTZ_linear_mid_tread_half(self, node: QTZ_linear_mid_tread_half, **kwargs: Any) -> None:
-        self._aqtz_aqtz[node] = [node]
-        self._makeup_aqtz(node)
-
-    def run_backward_by_default(self, node: Operator, **kwargs: Any) -> None:
-        if node.is_monotonic and not node_is_conv(node):
-            self._makeup_skippable(node)
-        self._makeup_aqtz(node)
-
-    def run_backward_conv(self, node: Conv, **kwargs: Any) -> None:
-        self._makeup_skippable(node)
-        if node_is_qconv(node):
-            self._qconv_qconv[node] = [node]
-
-    def run_forward_conv(self, node: Conv, **kwargs: Any) -> None:
-        bits: List[int] = []
-        aqtzers: List[Quantizer] = []
-        if node_is_qconv(node):
-            for x in self._qconv_qconv[node]:
-                if node_is_activation_quantizer(x):
-                    bits.append(x.nbit)
-                    aqtzers.append(x)
-
-        if not (len(set(bits)) == 1):
-            ValueError('Values are not consistent')
-        else:
-            node.a_quantizer = aqtzers
+    matches: List[NodeMatch] = list()
+    p = Pattern('Conv')
+    gm.get_op_type_matches(p, matches)
+
+    for m in matches:
+        conv_node = m.node
+
+        # check if this is a quantized convolution
+        if not conv_node.quantizer or not conv_node.a_quantizer:
+            continue
+
+        # Mark as quantized convolution
+        conv_node.is_quantized = True
+
+        # change the output data type of the convolution if thresholds are available
+        if conv_node.has_thresholds:
+            conv_node.dtype = QUANTIZED_NOT_PACKED
+
+        # change the output data type of the quantizers
+        conv_node.quantizer.dtype = Uint32
+        for qtz in conv_node.a_quantizer:
+            qtz.dtype = QUANTIZED_NOT_PACKED
+
+
+def pass_propagate_datatypes(graph) -> None:
+
+    gm = GraphMatcher(graph)
+
+    matches: List[NodeMatch] = list()
+    p = Pattern('*')
+    gm.get_op_type_matches(p, matches)
 
-    def run_forward_QTZ_linear_mid_tread_half(self, node: QTZ_linear_mid_tread_half, **kwargs: Any) -> None:
-        self._apply_threshold_skipping(self._aqtz_aqtz[node])
+    for m in matches:
+        if m.node.op_type != 'Conv' and m.node.preserve_quantization:
+            m.node.dtype = m.node.input_nodes[0].dtype
 
 
-class Optimizer(object):
-    """Class of optimization classes."""
+def pass_propagate_output_type_backward(graph: Graph) -> None:
 
-    def transpose_NHWC(self, graph: Graph) -> Graph:
-        runner = NHWC_Transposer(graph)
-        kwargs: Dict[str, Any] = {}
-        runner.run(**kwargs)
-        return graph
+    gm = GraphMatcher(graph)
 
-    def precompute(self, graph: Graph, hard_quantized: bool = False) -> Graph:
-        runner1 = DTypeChanger(graph)
-        runner2 = PreComputeRunner(graph, hard_quantized=hard_quantized)
+    matches: List[NodeMatch] = list()
+    p = Pattern('*')
 
-        kwargs: Dict[str, Set[Conv]] = {'qconv': set()}
+    gm.get_op_type_matches(p, matches)
 
-        # run
-        if hard_quantized:
-            runner1.run(**kwargs)
-        runner2.run(**kwargs)
+    def find_input(node, otype):
+        for n in node.input_nodes:
+            if n.op_type == 'Conv' and n.is_quantized:
+                n.dtype = otype
+                return
+            find_input(n, otype)
 
-        return graph
+    output_node = matches[-1].node
 
-    def threshold_skipping(self, graph: Graph) -> Graph:
-        runner1 = ApplyThresholdSkipping(graph)
-        kwargs: Dict[str, Any] = {}
-        runner1.run(**kwargs)
-        return graph
+    output_type = output_node.dtype
+    find_input(output_node, output_type)
diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py
index 3b5a10709..f2cff229b 100644
--- a/dlk/python/dlk/scripts/generate_project.py
+++ b/dlk/python/dlk/scripts/generate_project.py
@@ -20,410 +20,24 @@
 - Generate all cpp source headers and other control files like Makefile.
 """
 import click
+import utils
 from os import path
-import shutil
-import math
-import numpy as np
 
 from core.config import Config
 from core.graph import Graph
 from core.model import Model
 from core.params import Params
-from core.optimizer import Optimizer
 from code_generater import CodeGenerater
 from frontend import TensorFlowIO
-from core.graph_pattern_matching import GraphMatcher, Pattern, match_to_execution_list
-from core.operators import Constant, Operator
-from modules.packer import Packer
-from core.data_types import Uint32, QUANTIZED_NOT_PACKED
-from typing import cast
-from collections import defaultdict
-import utils
+from core.optimizer import pass_dot_graph, pass_remove_identities, pass_transpose, pass_precompute, \
+    pass_propagate_quantization_details_into_conv, pass_compute_thresholds, pass_pack_weights, \
+    pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward
 
 SCRITPS_DIR = path.abspath(path.dirname(__file__))
 DLK_ROOT_DIR = path.abspath(path.join(SCRITPS_DIR, '..'))
 ROOT_DIR = path.abspath(path.join(SCRITPS_DIR, '../../..'))
 
 
-def pass_dot_graph(graph: Graph, filename):
-
-    dot_script = 'digraph {'
-
-    code = {}
-    counter = 0
-    for node in graph.operators:
-        code[node.name] = counter
-        counter += 1
-
-    for node in graph.operators:
-
-        shape = '-'
-        if node.shape:
-            shape = 'x'.join(str(x) for x in node.shape)
-        shape += '(' + node.dimension + ')'
-
-        dot_script += node.name + '[label="<f0> ' + format(code[node.name], '04X') + '| <f1> ' + \
-            node.op_type + '| <f2> ' + shape + '| <f3> ' + node.dtype.cpptype() + '" shape = "record"];'
-        for i in node.input_nodes:
-            dot_script += i.name + ' -> ' + node.name + ';'
-
-    dot_script += '}'
-
-    with open(filename, 'w') as f:
-        f.write(dot_script)
-
-
-def pass_remove_identities(graph: Graph):
-
-    gm = GraphMatcher(graph)
-
-    to_be_removed = list()
-    matches = list()
-    p = Pattern("Identity")
-    gm.get_op_type_matches(p, matches)
-
-    for m in matches:
-        """skip all identity."""
-        in_op = m.node.input_ops['input']
-        out_ops = m.node.output_ops['output']
-        for out_op in out_ops:
-            for k, v in out_op.input_ops.items():
-                if v == m.node:
-                    # change the output's input to this identity's input
-                    out_op.add_input(k, in_op)
-                    # change the input's output to this identity's output
-                    for k2, v2 in in_op.output_ops.items():
-                        if m.node in v2:
-                            v2.remove(m.node)
-                            v2.append(out_op)
-                            break
-                    break
-
-        to_be_removed.append(m.node)
-
-    for op in to_be_removed:
-        graph.remove_op(op)
-
-
-def pass_transpose(graph):
-
-    gm = GraphMatcher(graph)
-
-    matches = list()
-    p = Pattern("*")
-    gm.get_op_type_matches(p, matches)
-
-    for m in matches:
-        dim = m.node.dimension
-        shape = m.node.shape
-        if len(shape) != 4 or len(dim) != 4 or not set(dim).issubset({'N', 'H', 'W', 'C', 'I', 'O'}):
-            continue
-
-        dim = dim.replace('I', 'C')
-        dim = dim.replace('O', 'N')
-
-        permutation = list(map(lambda s: dim.index(s), 'NHWC'))
-        m.node.transpose(permutation)
-
-
-def pass_precompute(graph, processed_nodes):
-
-    gm = GraphMatcher(graph)
-
-    matches = list()
-    p = Pattern('*')
-    gm.get_op_type_matches(p, matches)
-
-    processed_before_precompute = len(processed_nodes)
-
-    for m in matches:
-        if m.node in processed_nodes:
-            continue
-
-        # We want operators with inputs
-        if not m.node.input_nodes:
-            continue
-
-        precomputable = True
-        for input_node in m.node.input_nodes:
-            if input_node.op_type != 'Constant':
-                precomputable = False
-
-        if not precomputable:
-            continue
-
-        processed_nodes += m.node.input_nodes
-        processed_nodes.append(m.node)
-
-        data = m.node.run_forward()
-
-        new_constant = Constant(
-            m.node.name + '_new',
-            m.node.dtype,
-            data,
-            dimension_format=m.node.dimension
-        )
-
-        graph.add_op(new_constant)
-
-        new_constant.add_outputs(m.node.output_ops)
-        for output_name, consumer_list in m.node.output_ops.items():
-            for consumer_node in consumer_list:
-                for input_name, input_node in consumer_node.input_ops.items():
-                    if input_node == m.node:
-                        consumer_node.add_input(input_name, new_constant)
-                        break
-
-    return len(processed_nodes) > processed_before_precompute
-
-
-def pass_propagate_quantization_details_into_conv(graph):
-
-    gm = GraphMatcher(graph)
-
-    matches = list()
-    p = Pattern('*')
-    gm.get_op_type_matches(p, matches)
-
-    qtypes = [
-        'QTZ_binary_mean_scaling',
-        'QTZ_linear_mid_tread_half',
-        'QTZ_binary_channel_wise_mean_scaling'
-    ]
-
-    quant_details = defaultdict(list)
-    for m in matches:
-        if not m.node.preserve_quantization:
-            quant_details[m.node.name] = []
-            continue
-
-        if m.node.op_type == 'Conv':
-            input_node = m.node.input_nodes[0]
-            weight_node = m.node.input_nodes[1]
-
-            m.node.a_quantizer = [input_node] if input_node.op_type in qtypes else quant_details[input_node.name]
-            m.node.quantizer = weight_node if weight_node.op_type in qtypes else quant_details[weight_node.name]
-
-            quant_details[m.node.name] = []
-        else:
-            qtzs = []
-            for n in m.node.input_nodes:
-                if n.op_type in qtypes:
-                    qtzs.append(n)
-                else:
-                    for q in quant_details[n.name]:
-                        qtzs.append(q)
-
-            quant_details[m.node.name] = qtzs if len(qtzs) == len(m.node.input_nodes) else []
-            # TODO: check if the quantizers use same n_bits
-
-
-def pass_compute_thresholds(graph):
-
-    gm = GraphMatcher(graph)
-
-    matches = list()
-    p = Pattern('QTZ_linear_mid_tread_half')
-
-    gm.get_op_type_matches(p, matches)
-
-    for m in matches:
-
-        p = [m.node]
-        while p[-1].op_type != 'Conv':
-            non_variable_input = [inode for inode in p[-1].input_nodes
-                                  if (not cast(Operator, inode).is_variable and inode.is_monotonic)
-                                  or inode.op_type == 'Conv']
-            if len(non_variable_input) != 1:
-                break
-            p.append(non_variable_input[-1])
-
-        if p[-1].op_type != 'Conv':
-            continue
-        quantizer_conv_output_node = p[0]
-        conv_node = p[-1]
-
-        # check if this is a quantized convolution
-        if not conv_node.quantizer or not conv_node.a_quantizer:
-            continue
-
-        quantizer_conv_weights = conv_node.quantizer
-        quantizer_conv_weights.run_forward_no_scaling_factor()
-        scaling_factor = quantizer_conv_weights.scaling_factor
-
-        # TODO: make '3' function on the number of bits of the number of bits
-        n = 2 ** 2 - 1
-        ch = conv_node.channel
-        # assume that the threshold values will be a 13-bit signed integer
-        max_th_value = 2 ** 12 - 1
-
-        # The threshold_table is ndarray that holds the threshold values for all channels
-        threshold_table = np.empty([ch, n + 1], dtype=np.int32)
-
-        # Compute threshold (t0, t1, t2)
-        for th_id, th_v in enumerate([0.5, 1.5, 2.5]):
-            init_threshold = np.full(ch, th_v, dtype=np.float64)
-
-            # run calculation in reverse order: q -> bn -> scaling
-            trans_th = {'data': init_threshold}
-            for op in p[:-1]:
-                trans_th = op.de_run(**trans_th)
-            threshold = (trans_th['data'] * np.float64(n)) / (np.float64(2.0) * scaling_factor)
-
-            for ch_id, th_per_ch in enumerate(threshold):
-                if quantizer_conv_weights.op_type == 'QTZ_binary_channel_wise_mean_scaling':
-                    threshold_table[ch_id, th_id] = int(math.floor(th_per_ch)) \
-                        if (scaling_factor[ch_id] < 0) ^ (ch_id in trans_th['nega_idx']) \
-                        else int(math.ceil(th_per_ch))
-                else:
-                    threshold_table[ch_id, th_id] = int(math.floor(th_per_ch)) \
-                        if (scaling_factor < 0) ^ (ch_id in trans_th['nega_idx']) \
-                        else int(math.ceil(th_per_ch))
-
-        # take care of threshold values that are larger than 16-bit signed integer
-        threshold_table[abs(threshold_table) > max_th_value] = max_th_value
-
-        for c in range(ch):
-            threshold_table[c, -1] = 1 \
-                if np.all(threshold_table[c, 1:-1] > threshold_table[c, :-2], axis=0) else -1
-            # Applying the magic number
-            if np.all(threshold_table[c, 1:-1] == threshold_table[c, :-2], axis=0):
-                threshold_table[c, -1] = 2
-
-        # Put the thresholds into list
-        conv_node.thresholds = threshold_table.flatten().tolist()
-
-        # TODO: Neil-san, you should keep this
-        # Disconnect batchnorm and the quantizer
-        out_ops = quantizer_conv_output_node.output_ops['output']
-        for output_node in out_ops:
-            for input_name, input_node in output_node.input_ops.items():
-                if input_node == quantizer_conv_output_node:
-                    output_node.add_input(input_name, conv_node)
-
-        conv_node.remove_output('Y')
-        conv_node.add_outputs({'Y': out_ops})
-
-        # TODO: temporary (only for drawing better graphs)
-        # batch_norm_node.remove_input('X')
-
-
-def pass_pack_weights(graph):
-
-    gm = GraphMatcher(graph)
-
-    quantization_types = [
-        'QTZ_binary_mean_scaling',
-        'QTZ_linear_mid_tread_half',
-        'QTZ_binary_channel_wise_mean_scaling'
-    ]
-
-    matches = list()
-    p = Pattern('Conv')
-
-    gm.get_op_type_matches(p, matches)
-
-    # TODO: pass proper parameters
-    packer = Packer(1, 32)
-
-    for m in matches:
-        conv_node = m.node
-
-        # check if this is a quantized convolution
-        if not conv_node.quantizer or not conv_node.a_quantizer:
-            continue
-
-        weight_quantizer = conv_node.quantizer
-        if weight_quantizer.op_type not in quantization_types:
-            continue
-
-        # Quantize the weights
-        weight_quantizer.run_forward()
-        op_data = weight_quantizer.binarizer(weight_quantizer.data)
-        data = packer.run(op_data.astype(np.float32), weight_quantizer.dimension)
-
-        quantized_constant = Constant(
-            weight_quantizer.name + '_new',
-            Uint32(),
-            data,
-            packed=True,
-            actual_shape=weight_quantizer.shape
-        )
-
-        graph.add_op(quantized_constant)
-
-        quantized_constant.add_outputs(weight_quantizer.output_ops)
-        for output_name, consumer_list in weight_quantizer.output_ops.items():
-            for consumer_node in consumer_list:
-                for input_name, input_node in consumer_node.input_ops.items():
-                    if input_node == weight_quantizer:
-                        consumer_node.add_input(input_name, quantized_constant)
-                        break
-
-
-def pass_quantize_convolutions(graph):
-
-    gm = GraphMatcher(graph)
-
-    matches = list()
-    p = Pattern('Conv')
-    gm.get_op_type_matches(p, matches)
-
-    for m in matches:
-        conv_node = m.node
-
-        # check if this is a quantized convolution
-        if not conv_node.quantizer or not conv_node.a_quantizer:
-            continue
-
-        # Mark as quantized convolution
-        conv_node.is_quantized = True
-
-        # change the output data type of the convolution if thresholds are available
-        if conv_node.has_thresholds:
-            conv_node.dtype = QUANTIZED_NOT_PACKED
-
-        # change the output data type of the quantizers
-        conv_node.quantizer.dtype = Uint32
-        for qtz in conv_node.a_quantizer:
-            qtz.dtype = QUANTIZED_NOT_PACKED
-
-
-def pass_propagate_datatypes(graph):
-
-    gm = GraphMatcher(graph)
-
-    matches = list()
-    p = Pattern('*')
-    gm.get_op_type_matches(p, matches)
-
-    for m in matches:
-        if m.node.op_type != 'Conv' and m.node.preserve_quantization:
-            m.node.dtype = m.node.input_nodes[0].dtype
-
-
-def pass_propagate_output_type_backward(graph):
-
-    gm = GraphMatcher(graph)
-
-    matches = list()
-    p = Pattern('*')
-
-    gm.get_op_type_matches(p, matches)
-
-    def find_input(node, otype):
-        for n in node.input_nodes:
-            if n.op_type == 'Conv' and n.is_quantized:
-                n.dtype = otype
-                return
-            find_input(n, otype)
-
-    output_node = matches[-1].node
-
-    output_type = output_node.dtype
-    find_input(output_node, output_type)
-
-
 def optimize_graph_step(model: Model, config: Config) -> None:
     """Optimze graph in the model.
 
@@ -437,20 +51,13 @@ def optimize_graph_step(model: Model, config: Config) -> None:
 
     """
     graph: Graph = model.graph
-
-    pass_dot_graph(graph, '/tmp/original.dot')
-
     pass_remove_identities(graph)
-    pass_dot_graph(graph, '/tmp/prune_identities.dot')
-
     pass_transpose(graph)
-    pass_dot_graph(graph, '/tmp/transposed.dot')
 
     if config.activate_hard_quantization:
         pass_propagate_quantization_details_into_conv(graph)
         if config.threshold_skipping:
             pass_compute_thresholds(graph)
-            # pass_propagate_output_type_backward(graph)
         pass_pack_weights(graph)
         pass_quantize_convolutions(graph)
 
@@ -461,13 +68,6 @@ def optimize_graph_step(model: Model, config: Config) -> None:
     processed_nodes = []
     while pass_precompute(graph, processed_nodes=processed_nodes):
         pass
-    pass_dot_graph(graph, '/tmp/final.dot')
-
-    optim = Optimizer()
-    # optim.transpose_NHWC(graph)
-    # optim.precompute(graph, config.activate_hard_quantization)
-    # if config.threshold_skipping:
-    #    optim.threshold_skipping(graph)
 
 
 def generate_code_step(model: Model, config: Config) -> None:

From 0a36c2b865a8ed05a61474357177c9d0944be64b Mon Sep 17 00:00:00 2001
From: Antonio <antonio@leapmind.io>
Date: Thu, 13 Dec 2018 09:54:48 +0900
Subject: [PATCH 16/45] Remove visited properties

---
 dlk/python/dlk/core/graph_pattern_matching.py | 28 +++++----
 dlk/python/dlk/core/operators.py              |  8 ---
 dlk/python/dlk/core/optimizer.py              | 57 +++++++------------
 dlk/python/dlk/scripts/generate_project.py    |  2 +-
 4 files changed, 37 insertions(+), 58 deletions(-)

diff --git a/dlk/python/dlk/core/graph_pattern_matching.py b/dlk/python/dlk/core/graph_pattern_matching.py
index 8ae3840a2..35f0a76a1 100644
--- a/dlk/python/dlk/core/graph_pattern_matching.py
+++ b/dlk/python/dlk/core/graph_pattern_matching.py
@@ -31,10 +31,8 @@ def __init__(self):
         self.inputs = list()
 
 
-def sort_graph(graph, exec_list):
-    for node in graph.operators:
-        node.visited = False
-
+def sort_graph(graph):
+    exec_list = list()
     input_nodes = list()
     for node in graph.operators:
         input_nodes += [n.name for n in node.input_nodes]
@@ -44,18 +42,24 @@ def sort_graph(graph, exec_list):
         if node not in input_nodes:
             output_nodes.append(node)
 
+    visited = {}
+    for node in graph.operators:
+        visited[node.name] = False
+
     for node in output_nodes:
-        top_order(node, exec_list)
+        top_order(node, exec_list, visited)
+
+    return exec_list
 
 
-def top_order(output_node, exec_list):
-    if output_node.visited:
+def top_order(output_node, exec_list, visited):
+    if visited[output_node.name]:
         return
     for input_node in output_node.input_nodes:
-        top_order(input_node, exec_list)
+        top_order(input_node, exec_list, visited)
 
     exec_list.append(output_node)
-    output_node.visited = True
+    visited[output_node.name] = True
 
 
 def match_to_execution_list(match, execution_list):
@@ -67,7 +71,7 @@ def match_to_execution_list(match, execution_list):
 class GraphMatcher:
     def __init__(self, input_graph=Graph()):
         self.graph_node_list = list()
-        sort_graph(input_graph, self.graph_node_list)
+        self.graph_node_list = sort_graph(input_graph)
 
         self._node_map = {node.name: node for node in self.graph_node_list}
 
@@ -76,7 +80,8 @@ def record_matched_nodes(self, match, matched_nodes):
         for input_node in match.inputs:
             self.record_matched_nodes(input_node, matched_nodes)
 
-    def get_op_type_matches(self, pattern, matches):
+    def get_op_type_matches(self, pattern):
+        matches = list()
         matched_nodes = set()
         for node in self.graph_node_list:
             if node in matched_nodes:
@@ -86,6 +91,7 @@ def get_op_type_matches(self, pattern, matches):
             if self.does_op_type_match(node, pattern, matched_nodes, match):
                 self.record_matched_nodes(match, matched_nodes)
                 matches.append(match)
+        return matches
 
     def does_op_type_match(self, node, pattern, previously_matched_nodes, match):
         if node.name in previously_matched_nodes:
diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py
index a8b234ac8..19ae9b863 100644
--- a/dlk/python/dlk/core/operators.py
+++ b/dlk/python/dlk/core/operators.py
@@ -560,14 +560,6 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li
         """
         raise NotImplementedError(f'operator {cls.__name__} cannot infer its shape.')
 
-    @property
-    def visited(self) -> bool:
-        return self._visited
-
-    @visited.setter
-    def visited(self, v: Bool) -> None:
-        self._visited = v
-
     @property
     def preserve_quantization(self) -> bool:
         return False
diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py
index c7a869cf5..70d4087a5 100644
--- a/dlk/python/dlk/core/optimizer.py
+++ b/dlk/python/dlk/core/optimizer.py
@@ -17,10 +17,8 @@
 import math
 import numpy as np
 
-from typing import Any, Dict, List, Optional, Set, cast
-
 from core.graph import Graph
-from core.graph_pattern_matching import GraphMatcher, Pattern, match_to_execution_list, NodeMatch
+from core.graph_pattern_matching import GraphMatcher, Pattern
 from core.operators import Constant, Operator
 from core.data_types import Uint32, QUANTIZED_NOT_PACKED
 from typing import cast
@@ -59,11 +57,10 @@ def pass_dot_graph(graph: Graph, filename) -> None:
 def pass_remove_identities(graph: Graph) -> None:
 
     gm = GraphMatcher(graph)
+    p = Pattern("Identity")
+    matches = gm.get_op_type_matches(p)
 
     to_be_removed = list()
-    matches: List[NodeMatch] = list()
-    p = Pattern("Identity")
-    gm.get_op_type_matches(p, matches)
 
     for m in matches:
         """skip all identity."""
@@ -91,10 +88,8 @@ def pass_remove_identities(graph: Graph) -> None:
 def pass_transpose(graph: Graph) -> None:
 
     gm = GraphMatcher(graph)
-
-    matches: List[NodeMatch] = list()
     p = Pattern("*")
-    gm.get_op_type_matches(p, matches)
+    matches = gm.get_op_type_matches(p)
 
     for m in matches:
         dim = m.node.dimension
@@ -112,10 +107,8 @@ def pass_transpose(graph: Graph) -> None:
 def pass_precompute(graph: Graph, processed_nodes) -> bool:
 
     gm = GraphMatcher(graph)
-
-    matches: List[NodeMatch] = list()
     p = Pattern('*')
-    gm.get_op_type_matches(p, matches)
+    matches = gm.get_op_type_matches(p)
 
     processed_before_precompute = len(processed_nodes)
 
@@ -162,10 +155,8 @@ def pass_precompute(graph: Graph, processed_nodes) -> bool:
 def pass_propagate_quantization_details_into_conv(graph: Graph) -> None:
 
     gm = GraphMatcher(graph)
-
-    matches: List[NodeMatch] = list()
     p = Pattern('*')
-    gm.get_op_type_matches(p, matches)
+    matches = gm.get_op_type_matches(p)
 
     qtypes = [
         'QTZ_binary_mean_scaling',
@@ -203,11 +194,8 @@ def pass_propagate_quantization_details_into_conv(graph: Graph) -> None:
 def pass_compute_thresholds(graph: Graph) -> None:
 
     gm = GraphMatcher(graph)
-
-    matches: List[NodeMatch] = list()
     p = Pattern('QTZ_linear_mid_tread_half')
-
-    gm.get_op_type_matches(p, matches)
+    matches = gm.get_op_type_matches(p)
 
     for m in matches:
 
@@ -301,6 +289,8 @@ def pass_compute_thresholds(graph: Graph) -> None:
 def pass_pack_weights(graph: Graph) -> None:
 
     gm = GraphMatcher(graph)
+    p = Pattern('Conv')
+    matches = gm.get_op_type_matches(p)
 
     quantization_types = [
         'QTZ_binary_mean_scaling',
@@ -308,13 +298,9 @@ def pass_pack_weights(graph: Graph) -> None:
         'QTZ_binary_channel_wise_mean_scaling'
     ]
 
-    matches: List[NodeMatch] = list()
-    p = Pattern('Conv')
-
-    gm.get_op_type_matches(p, matches)
-
-    # TODO: pass proper parameters
-    packer = Packer(1, 32)
+    word_size = 32
+    weight_bitwidth = 1
+    packer = Packer(weight_bitwidth, word_size)
 
     for m in matches:
         conv_node = m.node
@@ -323,6 +309,7 @@ def pass_pack_weights(graph: Graph) -> None:
         if not conv_node.quantizer or not conv_node.a_quantizer:
             continue
 
+        # Check if we support this kind of quantizer
         weight_quantizer = conv_node.quantizer
         if weight_quantizer.op_type not in quantization_types:
             continue
@@ -332,6 +319,7 @@ def pass_pack_weights(graph: Graph) -> None:
         op_data = weight_quantizer.binarizer(weight_quantizer.data)
         data = packer.run(op_data.astype(np.float32), weight_quantizer.dimension)
 
+        # Create the new constant with the quantized weights
         quantized_constant = Constant(
             weight_quantizer.name + '_new',
             Uint32(),
@@ -340,8 +328,8 @@ def pass_pack_weights(graph: Graph) -> None:
             actual_shape=weight_quantizer.shape
         )
 
+        # Add the constant to the graph and connect the new constant
         graph.add_op(quantized_constant)
-
         quantized_constant.add_outputs(weight_quantizer.output_ops)
         for output_name, consumer_list in weight_quantizer.output_ops.items():
             for consumer_node in consumer_list:
@@ -354,10 +342,8 @@ def pass_pack_weights(graph: Graph) -> None:
 def pass_quantize_convolutions(graph: Graph) -> None:
 
     gm = GraphMatcher(graph)
-
-    matches: List[NodeMatch] = list()
     p = Pattern('Conv')
-    gm.get_op_type_matches(p, matches)
+    matches = gm.get_op_type_matches(p)
 
     for m in matches:
         conv_node = m.node
@@ -382,10 +368,8 @@ def pass_quantize_convolutions(graph: Graph) -> None:
 def pass_propagate_datatypes(graph) -> None:
 
     gm = GraphMatcher(graph)
-
-    matches: List[NodeMatch] = list()
     p = Pattern('*')
-    gm.get_op_type_matches(p, matches)
+    matches = gm.get_op_type_matches(p)
 
     for m in matches:
         if m.node.op_type != 'Conv' and m.node.preserve_quantization:
@@ -395,11 +379,8 @@ def pass_propagate_datatypes(graph) -> None:
 def pass_propagate_output_type_backward(graph: Graph) -> None:
 
     gm = GraphMatcher(graph)
-
-    matches: List[NodeMatch] = list()
     p = Pattern('*')
-
-    gm.get_op_type_matches(p, matches)
+    matches = gm.get_op_type_matches(p)
 
     def find_input(node, otype):
         for n in node.input_nodes:
@@ -408,7 +389,7 @@ def find_input(node, otype):
                 return
             find_input(n, otype)
 
+    # propagate output data type to the last quantized convolution
     output_node = matches[-1].node
-
     output_type = output_node.dtype
     find_input(output_node, output_type)
diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py
index f2cff229b..f29ad24ed 100644
--- a/dlk/python/dlk/scripts/generate_project.py
+++ b/dlk/python/dlk/scripts/generate_project.py
@@ -29,7 +29,7 @@
 from core.params import Params
 from code_generater import CodeGenerater
 from frontend import TensorFlowIO
-from core.optimizer import pass_dot_graph, pass_remove_identities, pass_transpose, pass_precompute, \
+from core.optimizer import pass_remove_identities, pass_transpose, pass_precompute, \
     pass_propagate_quantization_details_into_conv, pass_compute_thresholds, pass_pack_weights, \
     pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward
 

From 925044ee6d132cf2847dbf2098f591e868b40f2a Mon Sep 17 00:00:00 2001
From: Antonio <antonio@leapmind.io>
Date: Thu, 13 Dec 2018 10:01:01 +0900
Subject: [PATCH 17/45] Make find patterns easier

---
 dlk/python/dlk/core/graph_pattern_matching.py |  5 +++
 dlk/python/dlk/core/optimizer.py              | 42 +++++--------------
 2 files changed, 15 insertions(+), 32 deletions(-)

diff --git a/dlk/python/dlk/core/graph_pattern_matching.py b/dlk/python/dlk/core/graph_pattern_matching.py
index 35f0a76a1..dd003581c 100644
--- a/dlk/python/dlk/core/graph_pattern_matching.py
+++ b/dlk/python/dlk/core/graph_pattern_matching.py
@@ -31,6 +31,11 @@ def __init__(self):
         self.inputs = list()
 
 
+def find_pattern(graph, pattern):
+    gm = GraphMatcher(graph)
+    return gm.get_op_type_matches(pattern)
+
+
 def sort_graph(graph):
     exec_list = list()
     input_nodes = list()
diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py
index 70d4087a5..f2d488ab3 100644
--- a/dlk/python/dlk/core/optimizer.py
+++ b/dlk/python/dlk/core/optimizer.py
@@ -18,7 +18,7 @@
 import numpy as np
 
 from core.graph import Graph
-from core.graph_pattern_matching import GraphMatcher, Pattern
+from core.graph_pattern_matching import find_pattern, Pattern
 from core.operators import Constant, Operator
 from core.data_types import Uint32, QUANTIZED_NOT_PACKED
 from typing import cast
@@ -55,11 +55,8 @@ def pass_dot_graph(graph: Graph, filename) -> None:
 
 
 def pass_remove_identities(graph: Graph) -> None:
-
-    gm = GraphMatcher(graph)
     p = Pattern("Identity")
-    matches = gm.get_op_type_matches(p)
-
+    matches = find_pattern(graph, p)
     to_be_removed = list()
 
     for m in matches:
@@ -86,10 +83,8 @@ def pass_remove_identities(graph: Graph) -> None:
 
 
 def pass_transpose(graph: Graph) -> None:
-
-    gm = GraphMatcher(graph)
     p = Pattern("*")
-    matches = gm.get_op_type_matches(p)
+    matches = find_pattern(graph, p)
 
     for m in matches:
         dim = m.node.dimension
@@ -105,11 +100,8 @@ def pass_transpose(graph: Graph) -> None:
 
 
 def pass_precompute(graph: Graph, processed_nodes) -> bool:
-
-    gm = GraphMatcher(graph)
     p = Pattern('*')
-    matches = gm.get_op_type_matches(p)
-
+    matches = find_pattern(graph, p)
     processed_before_precompute = len(processed_nodes)
 
     for m in matches:
@@ -153,11 +145,8 @@ def pass_precompute(graph: Graph, processed_nodes) -> bool:
 
 
 def pass_propagate_quantization_details_into_conv(graph: Graph) -> None:
-
-    gm = GraphMatcher(graph)
     p = Pattern('*')
-    matches = gm.get_op_type_matches(p)
-
+    matches = find_pattern(graph, p)
     qtypes = [
         'QTZ_binary_mean_scaling',
         'QTZ_linear_mid_tread_half',
@@ -192,10 +181,8 @@ def pass_propagate_quantization_details_into_conv(graph: Graph) -> None:
 
 
 def pass_compute_thresholds(graph: Graph) -> None:
-
-    gm = GraphMatcher(graph)
     p = Pattern('QTZ_linear_mid_tread_half')
-    matches = gm.get_op_type_matches(p)
+    matches = find_pattern(graph, p)
 
     for m in matches:
 
@@ -287,11 +274,8 @@ def pass_compute_thresholds(graph: Graph) -> None:
 
 
 def pass_pack_weights(graph: Graph) -> None:
-
-    gm = GraphMatcher(graph)
     p = Pattern('Conv')
-    matches = gm.get_op_type_matches(p)
-
+    matches = find_pattern(graph, p)
     quantization_types = [
         'QTZ_binary_mean_scaling',
         'QTZ_linear_mid_tread_half',
@@ -340,10 +324,8 @@ def pass_pack_weights(graph: Graph) -> None:
 
 
 def pass_quantize_convolutions(graph: Graph) -> None:
-
-    gm = GraphMatcher(graph)
     p = Pattern('Conv')
-    matches = gm.get_op_type_matches(p)
+    matches = find_pattern(graph, p)
 
     for m in matches:
         conv_node = m.node
@@ -366,10 +348,8 @@ def pass_quantize_convolutions(graph: Graph) -> None:
 
 
 def pass_propagate_datatypes(graph) -> None:
-
-    gm = GraphMatcher(graph)
     p = Pattern('*')
-    matches = gm.get_op_type_matches(p)
+    matches = find_pattern(graph, p)
 
     for m in matches:
         if m.node.op_type != 'Conv' and m.node.preserve_quantization:
@@ -377,10 +357,8 @@ def pass_propagate_datatypes(graph) -> None:
 
 
 def pass_propagate_output_type_backward(graph: Graph) -> None:
-
-    gm = GraphMatcher(graph)
     p = Pattern('*')
-    matches = gm.get_op_type_matches(p)
+    matches = find_pattern(graph, p)
 
     def find_input(node, otype):
         for n in node.input_nodes:

From 33754eab5c6fc8a516483a2f9ec00d3985e3fd89 Mon Sep 17 00:00:00 2001
From: Antonio <antonio@leapmind.io>
Date: Thu, 13 Dec 2018 13:15:37 +0900
Subject: [PATCH 18/45] Delete disconnected nodes from graph and stop using
 graph runner on code generation

---
 dlk/python/dlk/core/graph.py                  |  9 +++---
 dlk/python/dlk/core/graph_pattern_matching.py | 14 +++++++---
 dlk/python/dlk/core/optimizer.py              | 28 ++++++++++++++++---
 dlk/python/dlk/scripts/generate_project.py    |  2 +-
 4 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/dlk/python/dlk/core/graph.py b/dlk/python/dlk/core/graph.py
index 0ff8fc0c4..b659f91e1 100644
--- a/dlk/python/dlk/core/graph.py
+++ b/dlk/python/dlk/core/graph.py
@@ -23,6 +23,8 @@
     Relu, Flatten, Dropout, Gemm, SpaceToDepth, Mul, QTZ_binary_channel_wise_mean_scaling, ConcatOnDepth, Maximum, \
     DepthToSpace, Split
 
+from core.graph_pattern_matching import sort_graph, find_pattern, Pattern
+
 
 class Graph(object):
     """Graph class. This class was formerly named as 'Nodes'."""
@@ -109,10 +111,9 @@ def consts(self) -> List[Operator]:
 
     @property
     def non_variables(self) -> List[Operator]:
-        kwargs: Dict[str, List[Operator]] = {'node_list': []}
-        sorter = NodesSorter(self)
-        sorter.run(**kwargs)
-        return [node for node in kwargs['node_list'] if not cast(Operator, node).is_variable]
+        node_list = sort_graph(self)
+        node_list = [node for node in node_list if not cast(Operator, node).is_variable]
+        return node_list
 
     def find_node_by_op_type(self, op_type: str) -> List[Operator]:
         """Find nodes which op_type is specified by the argument.
diff --git a/dlk/python/dlk/core/graph_pattern_matching.py b/dlk/python/dlk/core/graph_pattern_matching.py
index dd003581c..95862d260 100644
--- a/dlk/python/dlk/core/graph_pattern_matching.py
+++ b/dlk/python/dlk/core/graph_pattern_matching.py
@@ -15,9 +15,6 @@
 # =============================================================================
 """Graph pattern matching module."""
 
-from core.operators import Operator
-from core.graph import Graph
-
 
 class Pattern:
     def __init__(self, op=str(), inputs=list()):
@@ -67,6 +64,15 @@ def top_order(output_node, exec_list, visited):
     visited[output_node.name] = True
 
 
+def get_nodes_in_branch(starting_node, stop_node, node_list):
+    if starting_node == stop_node:
+        return
+    node_list.append(starting_node)
+
+    for node in starting_node.input_nodes:
+        get_nodes_in_branch(node, stop_node, node_list)
+
+
 def match_to_execution_list(match, execution_list):
     for input_node in match.inputs:
         match_to_execution_list(input_node, execution_list)
@@ -74,7 +80,7 @@ def match_to_execution_list(match, execution_list):
 
 
 class GraphMatcher:
-    def __init__(self, input_graph=Graph()):
+    def __init__(self, input_graph):
         self.graph_node_list = list()
         self.graph_node_list = sort_graph(input_graph)
 
diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py
index f2d488ab3..ec259e2c9 100644
--- a/dlk/python/dlk/core/optimizer.py
+++ b/dlk/python/dlk/core/optimizer.py
@@ -18,7 +18,7 @@
 import numpy as np
 
 from core.graph import Graph
-from core.graph_pattern_matching import find_pattern, Pattern
+from core.graph_pattern_matching import find_pattern, Pattern, get_nodes_in_branch
 from core.operators import Constant, Operator
 from core.data_types import Uint32, QUANTIZED_NOT_PACKED
 from typing import cast
@@ -103,6 +103,7 @@ def pass_precompute(graph: Graph, processed_nodes) -> bool:
     p = Pattern('*')
     matches = find_pattern(graph, p)
     processed_before_precompute = len(processed_nodes)
+    to_be_removed = []
 
     for m in matches:
         if m.node in processed_nodes:
@@ -131,9 +132,11 @@ def pass_precompute(graph: Graph, processed_nodes) -> bool:
             data,
             dimension_format=m.node.dimension
         )
-
         graph.add_op(new_constant)
 
+        # get nodes to be removed after being disconnected
+        get_nodes_in_branch(m.node, None, to_be_removed)
+
         new_constant.add_outputs(m.node.output_ops)
         for output_name, consumer_list in m.node.output_ops.items():
             for consumer_node in consumer_list:
@@ -141,6 +144,10 @@ def pass_precompute(graph: Graph, processed_nodes) -> bool:
                     if input_node == m.node:
                         consumer_node.add_input(input_name, new_constant)
                         break
+
+    for op in to_be_removed:
+        graph.remove_op(op)
+
     return len(processed_nodes) > processed_before_precompute
 
 
@@ -184,8 +191,8 @@ def pass_compute_thresholds(graph: Graph) -> None:
     p = Pattern('QTZ_linear_mid_tread_half')
     matches = find_pattern(graph, p)
 
+    to_be_removed = []
     for m in matches:
-
         p = [m.node]
         while p[-1].op_type != 'Conv':
             non_variable_input = [inode for inode in p[-1].input_nodes
@@ -262,16 +269,23 @@ def pass_compute_thresholds(graph: Graph) -> None:
         # Put the thresholds into list
         conv_node.thresholds = threshold_table.flatten().tolist()
 
-        # Disconnect batchnorm and the quantizer
+        # get nodes to be removed after being disconnected
+        get_nodes_in_branch(quantizer_conv_output_node, conv_node, to_be_removed)
+
+        # Disconnect the outputs of the quantizer
         out_ops = quantizer_conv_output_node.output_ops['output']
         for output_node in out_ops:
             for input_name, input_node in output_node.input_ops.items():
                 if input_node == quantizer_conv_output_node:
                     output_node.add_input(input_name, conv_node)
 
+        # Disconnect the outputs of the conv
         conv_node.remove_output('Y')
         conv_node.add_outputs({'Y': out_ops})
 
+    for op in to_be_removed:
+        graph.remove_op(op)
+
 
 def pass_pack_weights(graph: Graph) -> None:
     p = Pattern('Conv')
@@ -285,6 +299,7 @@ def pass_pack_weights(graph: Graph) -> None:
     word_size = 32
     weight_bitwidth = 1
     packer = Packer(weight_bitwidth, word_size)
+    to_be_removed = []
 
     for m in matches:
         conv_node = m.node
@@ -312,6 +327,9 @@ def pass_pack_weights(graph: Graph) -> None:
             actual_shape=weight_quantizer.shape
         )
 
+        # get nodes to be removed after being disconnected
+        get_nodes_in_branch(weight_quantizer, None, to_be_removed)
+
         # Add the constant to the graph and connect the new constant
         graph.add_op(quantized_constant)
         quantized_constant.add_outputs(weight_quantizer.output_ops)
@@ -322,6 +340,8 @@ def pass_pack_weights(graph: Graph) -> None:
                         consumer_node.add_input(input_name, quantized_constant)
                         break
 
+    for op in to_be_removed:
+        graph.remove_op(op)
 
 def pass_quantize_convolutions(graph: Graph) -> None:
     p = Pattern('Conv')
diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py
index f29ad24ed..8bd0a2d63 100644
--- a/dlk/python/dlk/scripts/generate_project.py
+++ b/dlk/python/dlk/scripts/generate_project.py
@@ -31,7 +31,7 @@
 from frontend import TensorFlowIO
 from core.optimizer import pass_remove_identities, pass_transpose, pass_precompute, \
     pass_propagate_quantization_details_into_conv, pass_compute_thresholds, pass_pack_weights, \
-    pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward
+    pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward, pass_dot_graph
 
 SCRITPS_DIR = path.abspath(path.dirname(__file__))
 DLK_ROOT_DIR = path.abspath(path.join(SCRITPS_DIR, '..'))

From 93013ede4741e0ec4dca48d37c30ee83984daa72 Mon Sep 17 00:00:00 2001
From: Antonio <antonio@leapmind.io>
Date: Thu, 13 Dec 2018 14:18:55 +0900
Subject: [PATCH 19/45] Deleted old code

---
 dlk/python/dlk/core/graph.py          | 403 --------------------------
 dlk/python/dlk/core/operators.py      |  73 -----
 dlk/python/dlk/frontend/tensorflow.py |   9 +-
 dlk/python/dlk/frontend/tf_export.py  | 359 -----------------------
 4 files changed, 1 insertion(+), 843 deletions(-)
 delete mode 100644 dlk/python/dlk/frontend/tf_export.py

diff --git a/dlk/python/dlk/core/graph.py b/dlk/python/dlk/core/graph.py
index b659f91e1..6c569b041 100644
--- a/dlk/python/dlk/core/graph.py
+++ b/dlk/python/dlk/core/graph.py
@@ -167,406 +167,3 @@ def check_nodes(self) -> bool:
 
         return True
 
-    def accept(self, runner: 'GraphRunner', **kwargs: Any) -> None:
-        """Accept a graph runner and run it from the output node."""
-        if TYPE_CHECKING:
-            import core.graph as gp
-        runner.initialize(**kwargs)
-
-        if runner.depth_first:  # depth first traversal
-            outputs = self.get_outputs()
-            for out in outputs:
-                out.accept(cast('gp.GraphRunner', runner), **kwargs)
-
-        else:  # breadth first traversal
-            # backward 1st
-            next = self.get_outputs()
-            if runner.is_lazy:
-                while next:
-                    def get_visit_list(ops: List[Operator]) -> List[bool]:
-                        return list(map(lambda n: runner.is_visited(cast(Operator, n)), ops))
-
-                    def and_all(list: List[bool]) -> bool:
-                        return functools.reduce(lambda x, y: x and y, list, True)
-
-                    # devide the `next` list into executables and non-executables
-                    execs = [op for op in next if and_all(get_visit_list(op.output_op_list))]
-                    non_execs = [op for op in next if not and_all(get_visit_list(op.output_op_list))]
-
-                    # if there is no executable operators, terminate this loop
-                    if execs == []:
-                        names = list(map(lambda x: x.name, non_execs))
-                        raise AssertionError(f'dead lock happened. {names} cannot run.')
-
-                    # execute
-                    next = non_execs
-                    for op in execs:
-                        next += op.accept_backward(cast('gp.GraphRunner', runner), **kwargs)
-            else:
-                for op in next:
-                    next += op.accept_backward(cast('gp.GraphRunner', runner), **kwargs)
-
-            # turn
-            runner.turn(**kwargs)
-
-            # forward run
-            next = self.get_inputs() + self.consts
-            if runner.is_lazy:
-                while next:
-                    def get_inputs(op: Operator) -> List[Operator]:
-                        return list(op.input_ops.values())
-
-                    def get_visit_list(ops: List[Operator]) -> List[bool]:
-                        return list(map(lambda n: not runner.is_visited(cast(Operator, n)), ops))
-
-                    def and_all(list: List[bool]) -> bool:
-                        return functools.reduce(lambda x, y: x and y, list, True)
-
-                    # devide the `next` list into executables and non-executables
-                    execs = [op for op in next if and_all(get_visit_list(get_inputs(op)))]
-                    non_execs = [op for op in next if not and_all(get_visit_list(get_inputs(op)))]
-
-                    # if there is no executable operators, terminate this loop
-                    if execs == []:
-                        names = list(map(lambda x: x.name, non_execs))
-                        raise AssertionError(f'dead lock happened. {names} cannot run.')
-
-                    # execute
-                    next = non_execs
-                    for op in execs:
-                        next += op.accept_forward(cast('gp.GraphRunner', runner), **kwargs)
-            else:
-                for op in next:
-                    next += op.accept_forward(cast('gp.GraphRunner', runner), **kwargs)
-
-        runner.finalize(**kwargs)
-
-
-class GraphRunner(object):
-    """Visitor class of a graph."""
-
-    def __init__(self, graph: Graph, depth_first: bool = True, lazy: bool = True) -> None:
-        """Set up the graph runner.
-
-        Parameters
-        ----------
-        graph : Graph
-            the graph to be traversed.
-
-        depth_first : bool
-            a flag that represents if the running is done in a depth first manner.
-            Otherwise, this runner runs in a breadth first manner. It defaults to
-            True, i.e. a depth first traversal.
-
-        lazy : bool
-            True if this runner runs in a lazy mode. This means all operator waits
-            for the traversal until the predecessors are traversed.
-            This flag is valid only in breadth-first mode. In the depth-first mode,
-            this is naturally true.
-        """
-        self._graph = graph
-        self._visited: Set[str] = set()
-        self._dfs = depth_first
-        self._is_lazy = lazy
-
-    def run(self, **kwargs: Any) -> None:
-        """Run this runner on the graph."""
-        self._graph.accept(self, **kwargs)
-
-    @property
-    def visited(self) -> Set[str]:
-        return set(self._visited)
-
-    def visit(self, op: Operator) -> None:
-        self._visited.add(op.name)
-
-    def unvisit(self, op: Operator) -> None:
-        self._visited.remove(op.name)
-
-    def is_visited(self, node: Operator) -> bool:
-        return node.name in self._visited
-
-    @property
-    def depth_first(self) -> bool:
-        """Returns True if this runs in a depth-first manner.
-
-        Otherwise, this runs in a breadth-first manner.
-        """
-        return self._dfs
-
-    @property
-    def is_lazy(self) -> bool:
-        """Returns True if this runs in a lazy mode, i.e. all node waits until all of its predecessors are traversed.
-
-        This flag is valide only in the breadth-first mode.
-        """
-        return self._is_lazy
-
-    def initialize(self, **kwargs: Any) -> None:
-        """Initialize the running.
-
-        This method is called when the run starts.
-        """
-        pass
-
-    def turn(self, **kwargs: Any) -> None:
-        """Turn from backward to forward.
-
-        This method is called only when the run is in a breadth-first manner.
-        """
-        pass
-
-    def finalize(self, **kwargs: Any) -> None:
-        """Finalize the running.
-
-        This method is called when the run finishes.
-        """
-        pass
-
-    def run_backward_by_default(self, node: Operator, **kwargs: Any) -> None:
-        pass
-
-    def run_forward_by_default(self, node: Operator, **kwargs: Any) -> None:
-        pass
-
-    def run_backward_input(self, node: Input, **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_input(self, node: Input, **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-    def run_backward_constant(self, node: Constant, **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_constant(self, node: Constant, **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-    def run_backward_output(self, node: Output, **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_output(self, node: Output, **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-    def run_backward_identity(self, node: Identity, **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_identity(self, node: Identity, **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-    def run_backward_QTZ_binary_mean_scaling(self, node: QTZ_binary_mean_scaling, **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_QTZ_binary_mean_scaling(self, node: QTZ_binary_mean_scaling, **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-    def run_backward_transpose(self, node: Transpose, **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_transpose(self, node: Transpose, **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-    def run_backward_conv(self, node: Conv, **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_conv(self, node: Conv, **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-    def run_backward_batch_normalization(self, node: BatchNormalization, **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_batch_normalization(self, node: BatchNormalization, **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-    def run_backward_QTZ_linear_mid_tread_half(self, node: QTZ_linear_mid_tread_half, **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_QTZ_linear_mid_tread_half(self, node: QTZ_linear_mid_tread_half, **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-    def run_backward_add(self, node: Add, **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_add(self, node: Add, **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-    def run_backward_max_pool(self, node: MaxPool, **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_max_pool(self, node: MaxPool, **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-    def run_backward_average_pool(self, node: AveragePool, **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_average_pool(self, node: AveragePool, **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-    def run_backward_reshape(self, node: Reshape, **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_reshape(self, node: Reshape, **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-    def run_backward_softmax(self, node: Softmax, **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_softmax(self, node: Softmax, **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-    def run_backward_relu(self, node: Relu, **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_relu(self, node: Relu, **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-    def run_backward_flatten(self, node: Flatten, **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_flatten(self, node: Flatten, **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-    def run_backward_dropout(self, node: Dropout, **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_dropout(self, node: Dropout, **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-    def run_backward_gemm(self, node: Gemm, **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_gemm(self, node: Gemm, **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-    def run_backward_SpaceToDepth(self, node: SpaceToDepth, **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_SpaceToDepth(self, node: SpaceToDepth, **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-    def run_backward_mul(self, node: Mul, **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_mul(self, node: Mul, **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-    def run_backward_QTZ_binary_channel_wise_mean_scaling(
-            self,
-            node: QTZ_binary_channel_wise_mean_scaling,
-            **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_QTZ_binary_channel_wise_mean_scaling(
-            self,
-            node: QTZ_binary_channel_wise_mean_scaling,
-            **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-    def run_backward_ConcatOnDepth(self, node: ConcatOnDepth, **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_ConcatOnDepth(self, node: ConcatOnDepth, **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-    def run_backward_Maximum(self, node: Maximum, **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_Maximum(self, node: Maximum, **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-    def run_backward_DepthToSpace(self, node: DepthToSpace, **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_DepthToSpace(self, node: DepthToSpace, **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-    def run_backward_Split(self, node: Split, **kwargs: Any) -> None:
-        self.run_backward_by_default(node, **kwargs)
-
-    def run_forward_Split(self, node: Split, **kwargs: Any) -> None:
-        self.run_forward_by_default(node, **kwargs)
-
-
-class NodesSorter(GraphRunner):
-    """Class for sorting the nodes of a graph
-
-    It will sort the nodes of a graph in topological order
-    """
-
-    def run_forward_input(self, node: Input, **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
-    def run_forward_constant(self, node: Constant, **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
-    def run_forward_output(self, node: Output, **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
-    def run_forward_identity(self, node: Identity, **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
-    def run_forward_QTZ_binary_mean_scaling(self, node: QTZ_binary_mean_scaling, **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
-    def run_forward_transpose(self, node: Transpose, **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
-    def run_forward_conv(self, node: Conv, **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
-    def run_forward_batch_normalization(self, node: BatchNormalization, **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
-    def run_forward_QTZ_linear_mid_tread_half(self, node: QTZ_linear_mid_tread_half, **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
-    def run_forward_add(self, node: Add, **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
-    def run_forward_max_pool(self, node: MaxPool, **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
-    def run_forward_average_pool(self, node: AveragePool, **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
-    def run_forward_reshape(self, node: Reshape, **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
-    def run_forward_softmax(self, node: Softmax, **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
-    def run_forward_relu(self, node: Relu, **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
-    def run_forward_flatten(self, node: Flatten, **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
-    def run_forward_dropout(self, node: Dropout, **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
-    def run_forward_gemm(self, node: Gemm, **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
-    def run_forward_SpaceToDepth(self, node: SpaceToDepth, **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
-    def run_forward_mul(self, node: Mul, **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
-    def run_forward_QTZ_binary_channel_wise_mean_scaling(
-            self,
-            node: QTZ_binary_channel_wise_mean_scaling,
-            **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
-    def run_forward_ConcatOnDepth(self, node: ConcatOnDepth, **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
-    def run_forward_Maximum(self, node: Maximum, **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
-    def run_forward_DepthToSpace(self, node: DepthToSpace, **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
-    def run_forward_Split(self, node: Split, **kwargs: Any) -> None:
-        kwargs['node_list'].append(node)
-
diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py
index 19ae9b863..9f059fb35 100644
--- a/dlk/python/dlk/core/operators.py
+++ b/dlk/python/dlk/core/operators.py
@@ -25,7 +25,6 @@
 from .data_types import *
 
 if TYPE_CHECKING:
-    from core.graph import GraphRunner
     import core.operators as ops
 
 Ops = Dict[str, 'Operator']
@@ -475,82 +474,10 @@ def run_forward(self) -> np.ndarray:
         raise NotImplementedError(
             f'operator {self.op_type} does not have runtime implemenatation yet.')
 
-    def accept(self, runner: 'GraphRunner', **kwargs: Any) -> None:
-        """Accept the graph runner and dispatch.
-
-        This should not be accessed directly, but be called inside.
-        A bit conplicated use of the visitor pattern.
-
-        runner : GraphRunner
-            Runner that runs through the graph from outputs to inputs (go backward),
-            then runs again from inputs to outputs (go forward).
-
-        **kwargs : Any
-            Any keyward arguments that can be referred and updated during the run.
-
-        """
-        if runner.is_visited(cast('ops.Operator', self)):
-            return
-
-        # run backward
-        self._dispatch_backward(runner, **kwargs)
-
-        # go inside the inputs
-        for i in self.input_ops.values():
-            i.accept(runner, **kwargs)
-
-        # run forward
-        self._dispatch_forward(runner, **kwargs)
-
-        # record visit
-        runner.visit(cast('ops.Operator', self))
-
-    def accept_backward(self, runner: 'GraphRunner', **kwargs: Any) -> List['Operator']:
-        """Accept the graph runner and dispatch for backward traversal, in a breadth-first ."""
-        if runner.is_visited(cast('ops.Operator', self)):
-            return []
-
-        # run backward
-        self._dispatch_backward(runner, **kwargs)
-
-        # record visit
-        runner.visit(cast('ops.Operator', self))
-
-        # return its inputs as next accepters
-        return list(self._input_ops.values())
-
-    def accept_forward(self, runner: 'GraphRunner', **kwargs: Any) -> List['Operator']:
-        """Accept the graph runner and dispatch for forward traversal, in a breadth-first ."""
-
-        # Note that all 'is_visited' flag is inverted, as this is already used in the backward run
-        if not runner.is_visited(cast('ops.Operator', self)):
-            return []
-
-        # run forward
-        self._dispatch_forward(runner, **kwargs)
-
-        # record (un)visit
-        runner.unvisit(cast('ops.Operator', self))
-
-        # return its outputs as next accepters
-        return self.output_op_list
-
     @property
     def _dispatch_name(self) -> str:
         return type(self).__name__.lower()
 
-    def _dispatch_backward(self, runner: 'GraphRunner', **kwargs: Any) -> None:
-        """Dispatch `runner.run_backward_xxx()` inside."""
-        method_name = 'run_backward_' + self._dispatch_name
-        method_body = getattr(runner, method_name)
-        method_body(self, **kwargs)
-
-    def _dispatch_forward(self, runner: 'GraphRunner', **kwargs: Any) -> None:
-        """Dispatch `runner.run_forward_xxx()` inside."""
-        method_name = 'run_forward_' + self._dispatch_name
-        method_body = getattr(runner, method_name)
-        method_body(self, **kwargs)
-
     @classmethod
     def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: List[str],
                     attrs: Dict[str, Any]) -> List[int]:
diff --git a/dlk/python/dlk/frontend/tensorflow.py b/dlk/python/dlk/frontend/tensorflow.py
index 9ffef7d84..7a947b63f 100644
--- a/dlk/python/dlk/frontend/tensorflow.py
+++ b/dlk/python/dlk/frontend/tensorflow.py
@@ -17,11 +17,7 @@
 from .base import BaseIO
 from core.model import Model
 from plugins.tf import Importer
-import tensorflow as tf
 from tensorflow.core.framework import graph_pb2
-from tensorflow.python.lib.io import file_io
-from frontend.tf_export import Exporter
-
 from os import path
 
 
@@ -59,7 +55,4 @@ def read(self, pb_path: str) -> Model:
         return model
 
     def write(self, model: Model, path: str) -> None:
-        graph: tf.Graph = Exporter.export_graph(model)
-        graph_def = graph.as_graph_def(add_shapes=True)
-
-        file_io.atomic_write_string_to_file(path, graph_def.SerializeToString())
+        raise NotImplementedError
diff --git a/dlk/python/dlk/frontend/tf_export.py b/dlk/python/dlk/frontend/tf_export.py
deleted file mode 100644
index 732762fff..000000000
--- a/dlk/python/dlk/frontend/tf_export.py
+++ /dev/null
@@ -1,359 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright 2018 The Blueoil Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-"""Exporter from DLK to TensorFlow."""
-
-import functools
-from typing import Any, Dict, List, Optional
-
-import numpy as np
-import tensorflow as tf
-from tensorflow.python.framework.function import Defun
-
-from core import model as dlk
-from core.data_types import DataType
-from core.graph import Graph as dlk_Graph
-from core.graph import GraphRunner
-from core.operators import Add, AveragePool, BatchNormalization, Constant, Conv, Identity, Input, \
-    MaxPool, Operator, Output, Transpose, QTZ_binary_mean_scaling, QTZ_linear_mid_tread_half, \
-    Reshape, Softmax, Relu, Flatten, Dropout, Gemm
-
-TF_DTYPE_MAP: Dict[str, tf.DType] = {
-    'Float16': tf.float16,
-    'Float32': tf.float32,
-    'Float64': tf.float64,
-    'Uint8': tf.uint8,
-    'Uint16': tf.uint16,
-    'Uint32': None,
-    'Uint64': None,
-    'Int8': tf.int8,
-    'Int16': tf.int16,
-    'Int32': tf.int32,
-    'Int64': tf.int64,
-
-    'Bool': tf.bool,
-    'String': tf.string,
-}
-
-
-class Exporter(GraphRunner):
-
-    @classmethod
-    def export_graph(cls, model: dlk.Model) -> tf.Graph:
-        dlk_graph = model.graph
-
-        runner = cls(dlk_graph)
-        runner.run()
-
-        return runner.tf_graph
-
-    def __init__(self, graph: dlk_Graph) -> None:
-        self._tf_graph = tf.Graph()
-        self.tf_ops: Dict[str, tf.Tensor] = {}
-        self._formats: Dict[str, str] = {}
-        self._permutation: Dict[str, List[int]] = {}
-        super().__init__(graph)
-
-    @property
-    def tf_graph(self) -> tf.Graph:
-        return self._tf_graph
-
-    # initialize and finalize
-
-    def initialize(self, **kwargs: Any) -> None:
-        """Set up TF's default graph"""
-        # self._tf_graph.as_default().__enter__()
-
-    def finalize(self, **kwargs: Any) -> None:
-        """Release the TF default graph"""
-        # self._tf_graph.as_default().__exit__(None, None, None)
-
-    # backward run: check the data format and transpose if needed
-
-    def _transpose_weights(self, node: Operator) -> Optional[str]:
-        given_format = self._formats.get(node.name)
-        set_format = node.dimension
-        if given_format and len(given_format) == 4 and given_format != set_format:
-            perm = [set_format.index(s) for s in given_format]
-            self._permutation[node.name] = perm
-
-        return given_format
-
-    def run_backward_constant(self, node: Constant, **kwargs: Any) -> None:
-        self._transpose_weights(node)
-
-    def _transpose_if_not_supported(self, node: Operator) -> None:
-        if node.dimension not in {'NHWC', 'NCHW'}:
-            perm = [node.dimension.index(s) for s in 'NHWC']
-            self._permutation[node.name] = perm
-
-    def run_backward_identity(self, node: Identity, **kwargs: Any) -> None:
-        given_format = self._transpose_weights(node)
-        if given_format:
-            self._formats[node.input_ops['input'].name] = given_format
-
-    def run_backward_QTZ_binary_mean_scaling(self, node: QTZ_binary_mean_scaling, **kwargs: Any) -> None:
-        given_format = self._transpose_weights(node)
-        if given_format:
-            self._formats[node.input_ops['input'].name] = given_format
-
-    def run_backward_transpose(self, node: Transpose, **kwargs: Any) -> None:
-        given_format = self._transpose_weights(node)
-        if given_format:
-            # change the input's format
-            perm = node.permutation
-            inv_perm = [perm[i] for i in range(len(perm))]  # inverse the perm
-            transposed_form = functools.reduce(
-                lambda x, y: x + y, [given_format[i] for i in inv_perm])
-            self._formats[node.input_ops['data'].name] = transposed_form
-
-    def run_backward_conv(self, node: Conv, **kwargs: Any) -> None:
-        # if the format is not supported, change their order
-        self._transpose_if_not_supported(node)
-        self._formats[node.input_ops['W'].name] = 'HWCN'
-
-    def run_backward_batch_normalization(self, node: BatchNormalization, **kwargs: Any) -> None:
-        given_format = self._transpose_weights(node)
-        if given_format:
-            self._formats[node.input_ops['X'].name] = given_format
-
-    def run_backward_QTZ_linear_mid_tread_half(self, node: QTZ_linear_mid_tread_half, **kwargs: Any) -> None:
-        given_format = self._transpose_weights(node)
-        if given_format:
-            self._formats[node.input_ops['X'].name] = given_format
-
-    def run_backward_max_pool(self, node: MaxPool, **kwargs: Any) -> None:
-        # if the format is not supported, change their order
-        self._transpose_if_not_supported(node)
-
-    def run_backward_average_pool(self, node: AveragePool, **kwargs: Any) -> None:
-        # if the format is not supported, change their order
-        self._transpose_if_not_supported(node)
-
-    # forward run: create tf operators
-
-    def _get_tf_dtype(self, dlk_dtype: DataType) -> tf.DType:
-        dtype = TF_DTYPE_MAP.get(dlk_dtype.name())
-        if dtype:
-            return dtype
-        else:
-            raise ValueError(f'dtype {dlk_dtype.name} is not supported.')
-
-    def _get_transposed_or_not(self, node: Operator):
-        if node.name in self._permutation.keys():
-            perm = self._permutation[node.name]
-            new_shape: List[int] = [node.shape[i] for i in perm]
-            new_dimension: str = functools.reduce(
-                lambda x, y: x + y, [node.dimension[i] for i in perm])
-            new_data: np.ndarray = node.data.transpose(perm)
-            return new_shape, new_dimension, new_data
-        else:
-            return node.shape, node.dimension, node.data
-
-    def run_forward_input(self, node: Input, **kwargs: Any) -> None:
-        new_shape, _, _ = self._get_transposed_or_not(node)
-        with self._tf_graph.as_default():
-            x = tf.placeholder(self._get_tf_dtype(node.dtype), shape=new_shape, name=node.name)
-        self.tf_ops[node.name] = x
-
-    def run_forward_constant(self, node: Constant, **kwargs: Any) -> None:
-        new_shape, _, new_data = self._get_transposed_or_not(node)
-        with self._tf_graph.as_default():
-            x = tf.constant(new_data, dtype=self._get_tf_dtype(node.dtype),
-                            shape=new_shape,
-                            name=node.name)
-        self.tf_ops[node.name] = x
-
-    def run_forward_output(self, node: Output, **kwargs: Any) -> None:
-        input = self.tf_ops[node.input_ops['input'].name]
-        with self._tf_graph.as_default():
-            x = tf.identity(input, name=node.name)
-        self.tf_ops[node.name] = x
-
-    def run_forward_identity(self, node: Identity, **kwargs: Any) -> None:
-        input = self.tf_ops[node.input_ops['input'].name]
-        with self._tf_graph.as_default():
-            x = tf.identity(input, name=node.name)
-        self.tf_ops[node.name] = x
-
-    def run_forward_QTZ_binary_mean_scaling(self, node: QTZ_binary_mean_scaling, **kwargs: Any) -> None:
-
-        x = self.tf_ops[node.input_ops['input'].name]
-
-        @Defun(self._get_tf_dtype(node.dtype), shape_func=lambda op: [op.inputs[0].get_shape()],
-               func_name='QTZ_binary_mean_scaling')
-        def _forward(x):
-            """Forward.
-            Args:
-                x(tf.Variable): The input to be quantized, weights normally.
-            Returns:
-                tf.Variable: The quantized input.
-            """
-            expectation = tf.reduce_mean(tf.abs(x))
-            return tf.sign(x) * expectation
-
-        with self._tf_graph.as_default():
-            output = _forward(x, name=node.name)
-        self.tf_ops[node.name] = output
-
-    def run_forward_transpose(self, node: Transpose, **kwargs: Any) -> None:
-        perm = node.permutation
-        a = self.tf_ops[node.input_ops['data'].name]
-        with self._tf_graph.as_default():
-            x = tf.transpose(a, perm, name=node.name)
-        self.tf_ops[node.name] = x
-
-    def _get_padding2D(self, input_shape: List[int], kernel_shape: List[int]) -> str:
-        return 'SAME' if input_shape == kernel_shape else 'VALID'
-
-    def run_forward_conv(self, node: Conv, **kwargs: Any) -> None:
-        if node.dilations != [1, 1, 1, 1]:
-            ValueError(f'Tensorflow v1.4 does not support dilations {node.dilations}')
-
-        x = self.tf_ops[node.input_ops['X'].name]
-        w = self.tf_ops[node.input_ops['W'].name]
-
-        inputs = [x, w]
-        dtypes = [self._get_tf_dtype(node.dtype)]
-        attrs: Dict[str, Any] = {}
-
-        dim = node.dimension
-        strides = [1, *(node.strides), 1] if dim == 'NHWC' \
-            else [1, 1, *(node.strides)]  # dim == 'NCHW'
-        in_x = node.input_ops['X']
-        padding = self._get_padding2D([in_x.height, in_x.width], [node.height, node.width])
-
-        with self._tf_graph.as_default():
-            y = tf.nn.conv2d(x, w, strides, padding, name=node.name,
-                             data_format=dim)
-        self.tf_ops[node.name] = y
-
-    def run_forward_batch_normalization(self, node: BatchNormalization, **kwargs: Any) -> None:
-        x = self.tf_ops[node.input_ops['X'].name]
-        scale = self.tf_ops[node.input_ops['scale'].name]
-        b = self.tf_ops[node.input_ops['B'].name]
-        mean = self.tf_ops[node.input_ops['mean'].name]
-        var = self.tf_ops[node.input_ops['var'].name]
-        epsilon = node.epsilon
-
-        # param_initializer = {'beta': b, 'gamma': scale, 'moving_mean': mean, 'moving_variance': var}
-        # test = tf.constant_initializer(10)
-
-        with self._tf_graph.as_default():
-            # b = tf.constant_initializer(b)
-            # scale = tf.constant_initializer(scale)
-            # mean = tf.constant_initializer(mean)
-            # var = tf.constant_initializer(var)
-            # y = tf.layers.batch_normalization(x, beta_initializer=b, gamma_initializer=scale,
-            #                                   moving_mean_initializer=mean,
-            #                                   moving_variance_initializer=var,
-            #                                   epsilon=epsilon, fused=True)
-            y = tf.nn.fused_batch_norm(x, scale, b, mean=mean, variance=var, epsilon=epsilon, is_training=False,
-                                       name=node.name)
-            # y = tf.nn.batch_normalization(x, mean, var, b, scale, epsilon, name=node.name)
-            # y = tf.contrib.layers.batch_norm(x, center=True, scale=True, epsilon=epsilon, fused=True)
-        self.tf_ops[node.name] = y[0]
-
-    def run_forward_QTZ_linear_mid_tread_half(self, node: QTZ_linear_mid_tread_half, **kwargs: Any) -> None:
-        x = self.tf_ops[node.input_ops['X'].name]
-        bit = self.tf_ops[node.input_ops['Y'].name]
-        max_value = self.tf_ops[node.input_ops['Z'].name]
-
-        @Defun(self._get_tf_dtype(node.dtype), tf.int32, tf.float32,
-               shape_func=lambda op: [op.inputs[0].get_shape()],
-               func_name='QTZ_linear_mid_tread_half')
-        def _func(x, bit, max_value):
-            min_value = 0
-            n = tf.pow(2., tf.cast(bit, dtype=tf.float32)) - 1
-            value_range = max_value - min_value
-
-            x = tf.clip_by_value(x, min_value, max_value, name="clip")
-            shifted = (x - min_value) / value_range
-            quantized = tf.round(shifted * n) / n
-            unshifted = quantized * value_range + min_value
-            return unshifted
-
-        with self._tf_graph.as_default():
-            output = _func(x, bit, max_value, name=node.name)
-        self.tf_ops[node.name] = output
-
-    def run_forward_add(self, node: Add, **kwargs: Any) -> None:
-        x = self.tf_ops[node.input_ops['A'].name]
-        y = self.tf_ops[node.input_ops['B'].name]
-
-        with self._tf_graph.as_default():
-            c = tf.add(x, y, name=node.name)
-        self.tf_ops[node.name] = c
-
-    def run_forward_max_pool(self, node: MaxPool, **kwargs: Any) -> None:
-        x = self.tf_ops[node.input_ops['X'].name]
-        ksize = [node.kernel_height, node.kernel_width]
-        strides = node.strides
-        in_x = node.input_ops['X']
-        padding = self._get_padding2D([in_x.height, in_x.width], [node.height, node.width])
-
-        with self._tf_graph.as_default():
-            y = tf.nn.max_pool(x, ksize, strides, padding, name=node.name)
-        self.tf_ops[node.name] = y
-
-    def run_forward_average_pool(self, node: AveragePool, **kwargs: Any) -> None:
-        x = self.tf_ops[node.input_ops['X'].name]
-        ksize = [node.kernel_height, node.kernel_width]
-        strides = node.strides
-        in_x = node.input_ops['X']
-        padding = self._get_padding2D([in_x.height, in_x.width], [node.height, node.width])
-
-        y = tf.nn.avg_pool(x, ksize, strides, padding, name=node.name)
-        self.tf_ops[node.name] = y
-
-    def run_forward_reshape(self, node: Reshape, **kwargs: Any) -> None:
-        tensor = self.tf_ops[node.input_ops['data'].name]
-        shape = node.shape
-
-        with self._tf_graph.as_default():
-            reshaped = tf.reshape(tensor, shape, name=node.name)
-        self.tf_ops[node.name] = reshaped
-
-    def run_forward_softmax(self, node: Softmax, **kwargs: Any) -> None:
-        logits = self.tf_ops[node.input_ops['input'].name]
-
-        with self._tf_graph.as_default():
-            output = tf.nn.softmax(logits, name=node.name)
-        self.tf_ops[node.name] = output
-
-    def run_forward_relu(self, node: Relu, **kwargs: Any) -> None:
-        features = self.tf_ops[node.input_ops['X'].name]
-
-        with self._tf_graph.as_default():
-            y = tf.nn.relu(features, name=node.name)
-        self.tf_ops[node.name] = y
-
-    def run_forward_flatten(self, node: Flatten, **kwargs: Any) -> None:
-        inputs = self.tf_ops[node.input_ops['input'].name]
-
-        with self._tf_graph.as_default():
-            output = tf.layers.flatten(inputs, name=node.name)
-        self.tf_ops[node.name] = output
-
-    def run_forward_dropout(self, node: Dropout, **kwargs: Any) -> None:
-        x = self.tf_ops[node.input_ops['data'].name]
-        keep_prob = 1 - node.ratio
-
-        with self._tf_graph.as_default():
-            output = tf.nn.dropout(x, keep_prob, name=node.name)
-        self.tf_ops[node.name] = output
-
-    def run_forward_gemm(self, node: Gemm, **kwargs: Any) -> None:
-        raise NotImplementedError(f'conversion of {node.op_type} is not supported yet.')

From 71fabddea2d34b0c8f292b506fac5056affb9cc0 Mon Sep 17 00:00:00 2001
From: nlpng <neil.phuang@gmail.com>
Date: Fri, 14 Dec 2018 08:41:44 +0900
Subject: [PATCH 20/45] Light fix two tests of optimizer & remove test for
 exporter

---
 dlk/python/dlk/core/operators.py |  4 +-
 dlk/python/dlk/core/optimizer.py |  5 +-
 dlk/tests/test_optimizer.py      | 92 +++++++++++++++++++-------------
 dlk/tests/test_tf_io.py          | 40 --------------
 4 files changed, 60 insertions(+), 81 deletions(-)

diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py
index 9f059fb35..40e12e6bd 100644
--- a/dlk/python/dlk/core/operators.py
+++ b/dlk/python/dlk/core/operators.py
@@ -281,8 +281,10 @@ def add_outputs(self, outputs: OutOps) -> None:
             All the key names have to be in list `output_names`.
 
         """
+        x = set(outputs.keys())
+        y = set(self._output_names)
         assert set(outputs.keys()).issubset(
-            set(self._output_names)), "Illegal output names included"
+            set(self._output_names)), f"Illegal output names {y} included {x}"
         for n in outputs.keys():
             lst = self._output_ops.get(n)
             if lst is not None:
diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py
index ec259e2c9..6ea1e35b9 100644
--- a/dlk/python/dlk/core/optimizer.py
+++ b/dlk/python/dlk/core/optimizer.py
@@ -343,6 +343,7 @@ def pass_pack_weights(graph: Graph) -> None:
     for op in to_be_removed:
         graph.remove_op(op)
 
+
 def pass_quantize_convolutions(graph: Graph) -> None:
     p = Pattern('Conv')
     matches = find_pattern(graph, p)
@@ -359,12 +360,12 @@ def pass_quantize_convolutions(graph: Graph) -> None:
 
         # change the output data type of the convolution if thresholds are available
         if conv_node.has_thresholds:
-            conv_node.dtype = QUANTIZED_NOT_PACKED
+            conv_node.dtype = QUANTIZED_NOT_PACKED()
 
         # change the output data type of the quantizers
         conv_node.quantizer.dtype = Uint32
         for qtz in conv_node.a_quantizer:
-            qtz.dtype = QUANTIZED_NOT_PACKED
+            qtz.dtype = QUANTIZED_NOT_PACKED()
 
 
 def pass_propagate_datatypes(graph) -> None:
diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py
index da4f55e98..0b1aa7d32 100644
--- a/dlk/tests/test_optimizer.py
+++ b/dlk/tests/test_optimizer.py
@@ -15,13 +15,14 @@
 # =============================================================================
 """Test file for GraphRunner."""
 import unittest
+import numpy as np
 from core.data_types import Float32, Uint32, Int32, QUANTIZED_NOT_PACKED
-from core.graph import Graph, GraphRunner
-from core.optimizer import Optimizer
+from core.graph import Graph
+from core.optimizer import pass_remove_identities, pass_transpose, pass_precompute, \
+    pass_propagate_quantization_details_into_conv, pass_compute_thresholds, pass_pack_weights, \
+    pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward, pass_dot_graph
 from core.operators import Add, AveragePool, BatchNormalization, Constant, Conv, Identity, Input, \
     MaxPool, Operator, Output, Transpose, QTZ_binary_mean_scaling, QTZ_linear_mid_tread_half, Reshape, Softmax
-
-import numpy as np
 from typing import Any, Dict, List, Tuple
 
 
@@ -36,41 +37,48 @@ def test_precompute1(self) -> None:
         graph1 = self.create_sample_graph(data1, data2, data3)
         graph2 = self.create_precompute_graph(data1, data2, data3)
 
-        optim = Optimizer()
-        optim.precompute(graph1)
-
-        # for debug
-        # from frontend import TensorFlowIO
-        # from core.model import Model
-        # import os
-        # io = TensorFlowIO()
-        # tmp_dir = os.path.join('tmp')
-        # if not os.path.exists(tmp_dir):
-        #     os.mkdir(tmp_dir)
-        # path = os.path.join('tmp', 'test_precompute.pb')
-        # model = Model()
-        # model.graph = graph1
-        # io.write(model, path)
+        pass_remove_identities(graph1)
+        pass_transpose(graph1)
 
-        self.assertEqual(graph1, graph2, 'precompute failed.')
+        pass_propagate_quantization_details_into_conv(graph1)
+        pass_pack_weights(graph1)
+        pass_quantize_convolutions(graph1)
+        pass_propagate_datatypes(graph1)
 
-        print("Precompute test #1 passed!")
-
-    def test_precompute2(self) -> None:
-        """Test code for precompute optimizer."""
-        data1 = np.random.rand(3, 2, 2, 3)
-        data2 = np.random.rand(3, 2, 2, 3)
-        data3 = np.random.rand(3, 2, 2, 3)
-        graph1 = self.create_sample_graph(data1, data2, data3)
-        graph2, scaling1, scaling2 = self.create_quantized_graph(data1, data2, data3)
-
-        optim = Optimizer()
-        optim.precompute(graph1, hard_quantized=True)
+        processed_nodes = []
+        while pass_precompute(graph1, processed_nodes=processed_nodes):
+            pass
 
         self.assertEqual(graph1, graph2, 'precompute failed.')
-        self.assertAlmostEqual(graph1.get_op('conv2').quantizer.scaling_factor, scaling2)  # type: ignore
 
-        print("Precompute test #2 passed!")
+        print("Precompute test #1 passed!")
+
+    # def test_precompute2(self) -> None:
+    #     """Test code for precompute optimizer."""
+    #     data1 = np.random.rand(3, 2, 2, 3)
+    #     data2 = np.random.rand(3, 2, 2, 3)
+    #     data3 = np.random.rand(3, 2, 2, 3)
+    #     graph1 = self.create_sample_graph(data1, data2, data3)
+    #     graph2, scaling1, scaling2 = self.create_quantized_graph(data1, data2, data3)
+    #
+    #     # optim = Optimizer()
+    #     # optim.precompute(graph1, hard_quantized=True)
+    #     pass_remove_identities(graph1)
+    #     pass_transpose(graph1)
+    #
+    #     pass_propagate_quantization_details_into_conv(graph1)
+    #     pass_pack_weights(graph1)
+    #     pass_quantize_convolutions(graph1)
+    #     pass_propagate_datatypes(graph1)
+    #
+    #     processed_nodes = []
+    #     while pass_precompute(graph1, processed_nodes=processed_nodes):
+    #         pass
+    #
+    #     self.assertEqual(graph1, graph2, 'precompute failed.')
+    #     self.assertAlmostEqual(graph1.get_op('conv2').quantizer.scaling_factor, scaling2)  # type: ignore
+    #
+    #     print("Precompute test #2 passed!")
 
     def test_precompute3(self) -> None:
         """Test code for precompute optimizer."""
@@ -80,8 +88,17 @@ def test_precompute3(self) -> None:
         graph1 = self.create_sample_graph3(data1, data2, data3)
         graph2, scaling2, scaling3 = self.create_quantized_graph2(data1, data2, data3)
 
-        optim = Optimizer()
-        optim.precompute(graph1, hard_quantized=True)
+        pass_remove_identities(graph1)
+        pass_transpose(graph1)
+
+        pass_propagate_quantization_details_into_conv(graph1)
+        pass_pack_weights(graph1)
+        pass_quantize_convolutions(graph1)
+        pass_propagate_datatypes(graph1)
+
+        processed_nodes = []
+        while pass_precompute(graph1, processed_nodes=processed_nodes):
+            pass
 
         self.assertEqual(graph1, graph2, 'precompute failed.')
         self.assertAlmostEqual(graph1.get_op('conv2').quantizer.scaling_factor, scaling2)  # type: ignore
@@ -95,8 +112,7 @@ def test_transpose_NHWC(self) -> None:
         graph1 = self.create_sample_graph2(data)
         graph2 = self.create_transposed_graph(data)
 
-        optim = Optimizer()
-        optim.transpose_NHWC(graph1)
+        pass_transpose(graph1)
 
         self.assertEqual(graph1, graph2, 'transpose to NHWC failed.')
 
diff --git a/dlk/tests/test_tf_io.py b/dlk/tests/test_tf_io.py
index f8fa4f99d..c94f901dc 100644
--- a/dlk/tests/test_tf_io.py
+++ b/dlk/tests/test_tf_io.py
@@ -210,46 +210,6 @@ def match(op1: Operator, op2: Operator) -> bool:
                 return False
         return True
 
-    def test_tf_export(self) -> None:
-        """Test code for exporting Tensorflow file with TensorflowIO."""
-        tmpdir = 'tmp'
-        tf_path = path.join(tmpdir,
-                            'test.pb')
-        if not path.exists(tmpdir):
-            makedirs(tmpdir)
-        elif not path.isdir(tmpdir):
-            raise ValueError('tmp directory is not a directory.')
-
-        model: Model = self.make_simple_model()
-
-        tf_io = TensorFlowIO()
-        tf_io.write(model, tf_path)
-        new_model = tf_io.read(tf_path)
-
-        self.assertTrue(self._comparator(model.graph, new_model.graph))
-
-        print("TF file export test #1 passed!")
-
-    def test_tf_export2(self) -> None:
-        """Test code for exporting Tensorflow file with TensorflowIO #2."""
-        tmpdir = 'tmp'
-        tf_path = path.join(tmpdir,
-                            'test2.pb')
-        if not path.exists(tmpdir):
-            makedirs(tmpdir)
-        elif not path.isdir(tmpdir):
-            raise ValueError('tmp directory is not a directory.')
-
-        model: Model = make_model()
-
-        tf_io = TensorFlowIO()
-        tf_io.write(model, tf_path)
-        new_model = tf_io.read(tf_path)
-
-        self.assertEqual(model.graph, new_model.graph)
-
-        print("TF file export test #2 passed!")
-
 
 if __name__ == '__main__':
     unittest.main()

From 5a0014f8a6a9482fb5e404f63a46b38343d57454 Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Fri, 14 Dec 2018 10:11:01 +0900
Subject: [PATCH 21/45] Fix tests for optimizer

---
 dlk/python/dlk/core/operators.py |  5 +--
 dlk/python/dlk/core/optimizer.py |  2 +-
 dlk/tests/test_optimizer.py      | 58 +++++++++++++++-----------------
 3 files changed, 29 insertions(+), 36 deletions(-)

diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py
index 40e12e6bd..223066470 100644
--- a/dlk/python/dlk/core/operators.py
+++ b/dlk/python/dlk/core/operators.py
@@ -281,10 +281,7 @@ def add_outputs(self, outputs: OutOps) -> None:
             All the key names have to be in list `output_names`.
 
         """
-        x = set(outputs.keys())
-        y = set(self._output_names)
-        assert set(outputs.keys()).issubset(
-            set(self._output_names)), f"Illegal output names {y} included {x}"
+        assert set(outputs.keys()).issubset(set(self._output_names)), f"Illegal output names included"
         for n in outputs.keys():
             lst = self._output_ops.get(n)
             if lst is not None:
diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py
index 6ea1e35b9..77e08312c 100644
--- a/dlk/python/dlk/core/optimizer.py
+++ b/dlk/python/dlk/core/optimizer.py
@@ -137,7 +137,7 @@ def pass_precompute(graph: Graph, processed_nodes) -> bool:
         # get nodes to be removed after being disconnected
         get_nodes_in_branch(m.node, None, to_be_removed)
 
-        new_constant.add_outputs(m.node.output_ops)
+        new_constant.add_outputs({'output': m.node.output_ops.values()})
         for output_name, consumer_list in m.node.output_ops.items():
             for consumer_node in consumer_list:
                 for input_name, input_node in consumer_node.input_ops.items():
diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py
index 0b1aa7d32..0e58d65b5 100644
--- a/dlk/tests/test_optimizer.py
+++ b/dlk/tests/test_optimizer.py
@@ -15,15 +15,16 @@
 # =============================================================================
 """Test file for GraphRunner."""
 import unittest
-import numpy as np
 from core.data_types import Float32, Uint32, Int32, QUANTIZED_NOT_PACKED
-from core.graph import Graph
 from core.optimizer import pass_remove_identities, pass_transpose, pass_precompute, \
     pass_propagate_quantization_details_into_conv, pass_compute_thresholds, pass_pack_weights, \
     pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward, pass_dot_graph
+from core.graph import Graph
 from core.operators import Add, AveragePool, BatchNormalization, Constant, Conv, Identity, Input, \
     MaxPool, Operator, Output, Transpose, QTZ_binary_mean_scaling, QTZ_linear_mid_tread_half, Reshape, Softmax
-from typing import Any, Dict, List, Tuple
+
+import numpy as np
+from typing import Tuple
 
 
 class TestOptimizer(unittest.TestCase):
@@ -40,9 +41,29 @@ def test_precompute1(self) -> None:
         pass_remove_identities(graph1)
         pass_transpose(graph1)
 
+        processed_nodes = []
+        while pass_precompute(graph1, processed_nodes=processed_nodes):
+            pass
+
+        self.assertEqual(graph1, graph2, 'precompute failed.')
+
+        print("Precompute test #1 passed!")
+
+    def test_precompute2(self) -> None:
+        """Test code for precompute optimizer."""
+        data1 = np.random.rand(3, 2, 2, 3)
+        data2 = np.random.rand(3, 2, 2, 3)
+        data3 = np.random.rand(3, 2, 2, 3)
+        graph1 = self.create_sample_graph(data1, data2, data3)
+        graph2, scaling1, scaling2 = self.create_quantized_graph(data1, data2, data3)
+
+        pass_remove_identities(graph1)
+        pass_transpose(graph1)
+
         pass_propagate_quantization_details_into_conv(graph1)
         pass_pack_weights(graph1)
         pass_quantize_convolutions(graph1)
+
         pass_propagate_datatypes(graph1)
 
         processed_nodes = []
@@ -50,35 +71,9 @@ def test_precompute1(self) -> None:
             pass
 
         self.assertEqual(graph1, graph2, 'precompute failed.')
+        self.assertAlmostEqual(graph1.get_op('conv2').quantizer.scaling_factor, scaling2)  # type: ignore
 
-        print("Precompute test #1 passed!")
-
-    # def test_precompute2(self) -> None:
-    #     """Test code for precompute optimizer."""
-    #     data1 = np.random.rand(3, 2, 2, 3)
-    #     data2 = np.random.rand(3, 2, 2, 3)
-    #     data3 = np.random.rand(3, 2, 2, 3)
-    #     graph1 = self.create_sample_graph(data1, data2, data3)
-    #     graph2, scaling1, scaling2 = self.create_quantized_graph(data1, data2, data3)
-    #
-    #     # optim = Optimizer()
-    #     # optim.precompute(graph1, hard_quantized=True)
-    #     pass_remove_identities(graph1)
-    #     pass_transpose(graph1)
-    #
-    #     pass_propagate_quantization_details_into_conv(graph1)
-    #     pass_pack_weights(graph1)
-    #     pass_quantize_convolutions(graph1)
-    #     pass_propagate_datatypes(graph1)
-    #
-    #     processed_nodes = []
-    #     while pass_precompute(graph1, processed_nodes=processed_nodes):
-    #         pass
-    #
-    #     self.assertEqual(graph1, graph2, 'precompute failed.')
-    #     self.assertAlmostEqual(graph1.get_op('conv2').quantizer.scaling_factor, scaling2)  # type: ignore
-    #
-    #     print("Precompute test #2 passed!")
+        print("Precompute test #2 passed!")
 
     def test_precompute3(self) -> None:
         """Test code for precompute optimizer."""
@@ -94,6 +89,7 @@ def test_precompute3(self) -> None:
         pass_propagate_quantization_details_into_conv(graph1)
         pass_pack_weights(graph1)
         pass_quantize_convolutions(graph1)
+
         pass_propagate_datatypes(graph1)
 
         processed_nodes = []

From fc27ab14bcd7027805a4b3537ee450fdafc760cb Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Fri, 14 Dec 2018 10:35:19 +0900
Subject: [PATCH 22/45] Remove test_graphrunner since these is no graphrunner
 to test

---
 dlk/tests/test_graphrunner.py | 232 ----------------------------------
 dlk/tests/test_optimizer.py   |   2 +-
 2 files changed, 1 insertion(+), 233 deletions(-)
 delete mode 100644 dlk/tests/test_graphrunner.py

diff --git a/dlk/tests/test_graphrunner.py b/dlk/tests/test_graphrunner.py
deleted file mode 100644
index 4eeb91f44..000000000
--- a/dlk/tests/test_graphrunner.py
+++ /dev/null
@@ -1,232 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright 2018 The Blueoil Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-"""Test file for GraphRunner."""
-import unittest
-from core.data_types import Float32
-from core.graph import Graph, GraphRunner
-from core.operators import Conv, Input, Output, Constant, Operator
-import numpy as np
-from typing import Any, Dict, List
-
-
-class TestRunner(GraphRunner):
-    """Test class of GraphRunner.
-
-    This just list up all the op_type of the graph.
-    """
-
-    def __init__(self, graph: Graph, depth_first: bool = True, lazy: bool = True) -> None:
-        self.message: List[str] = []
-        super().__init__(graph, depth_first=depth_first, lazy=lazy)
-
-    def initialize(self, **kwargs: Any) -> None:
-        self.message.append('start running.')
-
-    def finalize(self, **kwargs: Any) -> None:
-        self.message.append('finished running.')
-
-    # backward: ouput -> inputs
-    def run_backward_by_default(self, node: Operator, **kwargs: Any) -> None:
-        kwargs['backward'].append(node.name)
-
-    def run_backward_conv(self, node: Conv, **kwargs: Any) -> None:
-        self.message.append(f'{node.name}: backward process')
-        super().run_backward_conv(node, **kwargs)
-
-    # forward: inputs -> output
-    def run_forward_by_default(self, node: Operator, **kwargs: Any) -> None:
-        kwargs['forward'].append(node.name)
-
-    def run_forward_conv(self, node: Conv, **kwargs: Any) -> None:
-        self.message.append(f'{node.name}: forward process')
-        super().run_forward_conv(node, **kwargs)
-
-
-class TestGraphRunner(unittest.TestCase):
-    """Test class for GraphRunner."""
-
-    def test_graphrunner_default(self) -> None:
-        """Test code for GraphRunner, with the depth-first mode (default)."""
-        graph = Graph()
-        self.create_graph(graph)
-
-        kwargs: Dict[str, List[str]] = {'backward': [], 'forward': []}
-        runner = TestRunner(graph)
-        runner.run(**kwargs)
-
-        lst1 = ['output', 'conv4', 'input3', 'conv3', 'input2', 'conv2', 'conv1', 'input1', 'weight1', 'weight2']
-        self.assertEqual(kwargs['backward'], lst1,
-                         'backward traversal failed in depth-first mode.')
-
-        lst2 = ['input3', 'input2', 'input1', 'weight1', 'conv1', 'weight2', 'conv2', 'conv3', 'conv4', 'output']
-        self.assertEqual(kwargs['forward'], lst2, 'forward traversal failed in depth-first mode.')
-
-        self.assertEqual(runner.message, [
-            'start running.',
-            'conv4: backward process',
-            'conv3: backward process',
-            'conv2: backward process',
-            'conv1: backward process',
-            'conv1: forward process',
-            'conv2: forward process',
-            'conv3: forward process',
-            'conv4: forward process',
-            'finished running.',
-        ])
-
-        print("GraphRunner depth-first mode test passed!")
-
-    def test_graphrunner_breadth_first(self) -> None:
-        """Test code for GraphRunner, with the breadth-first mode."""
-        graph = Graph()
-        self.create_graph(graph)
-
-        kwargs: Dict[str, List[str]] = {'backward': [], 'forward': []}
-        runner = TestRunner(graph, depth_first=False, lazy=False)
-        runner.run(**kwargs)
-
-        lst1 = ['output', 'conv4', 'input3', 'conv3', 'input2', 'conv2', 'conv1', 'weight2', 'input1', 'weight1']
-        self.assertEqual(kwargs['backward'], lst1,
-                         'backward traversal failed in breadth-first mode.')
-
-        lst2 = ['input3', 'input2', 'input1', 'weight1', 'weight2',
-                'conv4', 'conv3', 'conv1', 'conv2', 'output']
-        self.assertEqual(kwargs['forward'], lst2, 'forward traversal failed in breadth-first mode.')
-
-        self.assertEqual(runner.message, [
-            'start running.',
-            'conv4: backward process',
-            'conv3: backward process',
-            'conv2: backward process',
-            'conv1: backward process',
-            'conv4: forward process',
-            'conv3: forward process',
-            'conv1: forward process',
-            'conv2: forward process',
-            'finished running.',
-        ])
-
-        print("GraphRunner bradth-first mode test passed!")
-
-    def test_graphrunner_lazy_breadth_first(self) -> None:
-        """Test code for GraphRunner, with the lazy breadth-first mode."""
-        graph = Graph()
-        self.create_graph(graph)
-
-        kwargs: Dict[str, List[str]] = {'backward': [], 'forward': []}
-        runner = TestRunner(graph, depth_first=False, lazy=True)
-        runner.run(**kwargs)
-
-        lst1 = ['output', 'conv4', 'input3', 'conv3', 'input2', 'conv2', 'conv1', 'weight2', 'input1', 'weight1']
-        self.assertEqual(kwargs['backward'], lst1,
-                         'backward traversal failed in breadth-first mode.')
-
-        lst2 = ['input3', 'input2', 'input1', 'weight1', 'weight2',
-                'conv1', 'conv2', 'conv3', 'conv4', 'output']
-        self.assertEqual(kwargs['forward'], lst2, 'forward traversal failed in breadth-first mode.')
-
-        self.assertEqual(runner.message, [
-            'start running.',
-            'conv4: backward process',
-            'conv3: backward process',
-            'conv2: backward process',
-            'conv1: backward process',
-            'conv1: forward process',
-            'conv2: forward process',
-            'conv3: forward process',
-            'conv4: forward process',
-            'finished running.',
-        ])
-
-        print("GraphRunner lazy breadth-first mode test passed!")
-
-    def create_graph(self, graph):
-
-        x1 = Input(
-            'input1',
-            [1, 4, 4, 3],
-            Float32(),
-        )
-
-        w1 = Constant(
-            'weight1',
-            Float32(),
-            np.zeros([1, 2, 2, 3])
-        )
-
-        conv1 = Conv(
-            'conv1',
-            [1, 3, 3, 3],
-            Float32(),
-            {'X': x1, 'W': w1},
-            kernel_shape=[2, 2]
-        )
-
-        w2 = Constant(
-            'weight2',
-            Float32(),
-            np.zeros([3, 2, 2, 3])
-        )
-
-        conv2 = Conv(
-            'conv2',
-            [1, 2, 2, 3],
-            Float32(),
-            {'X': conv1, 'W': w2},
-            kernel_shape=[2, 2]
-        )
-
-        x2 = Input(
-            'input2',
-            [3, 3, 3, 3],
-            Float32(),
-        )
-
-        x3 = Input(
-            'input3',
-            [3, 3, 3, 3],
-            Float32(),
-        )
-
-        conv3 = Conv(
-            'conv3',
-            [3, 2, 2, 3],
-            Float32(),
-            {'X': x2, 'W': conv2},
-            kernel_shape=[2, 2]
-        )
-
-        conv4 = Conv(
-            'conv4',
-            [1, 2, 2, 3],
-            Float32(),
-            {'X': x3, 'W': conv3},
-            kernel_shape=[2, 2]
-        )
-
-        y = Output(
-            'output',
-            [1, 2, 2, 3],
-            Float32(),
-            {'input': conv4}
-        )
-
-        # add ops to the graph
-        graph.add_op_and_inputs(y)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py
index 0e58d65b5..9e0d60db0 100644
--- a/dlk/tests/test_optimizer.py
+++ b/dlk/tests/test_optimizer.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
-"""Test file for GraphRunner."""
+"""Test file for Optimizer."""
 import unittest
 from core.data_types import Float32, Uint32, Int32, QUANTIZED_NOT_PACKED
 from core.optimizer import pass_remove_identities, pass_transpose, pass_precompute, \

From 90a49658f21c4938641d5197259f7368b5774e1a Mon Sep 17 00:00:00 2001
From: Antonio <antonio@leapmind.io>
Date: Tue, 25 Dec 2018 12:35:23 +0900
Subject: [PATCH 23/45] Added docstrings and deleted some unused code

---
 dlk/python/dlk/core/graph.py                  |  10 +-
 dlk/python/dlk/core/graph_pattern_matching.py |  73 ++++++++++-
 dlk/python/dlk/core/operators.py              |   3 -
 dlk/python/dlk/core/optimizer.py              | 118 +++++++++++++++++-
 dlk/python/dlk/scripts/generate_project.py    |   2 +-
 5 files changed, 187 insertions(+), 19 deletions(-)

diff --git a/dlk/python/dlk/core/graph.py b/dlk/python/dlk/core/graph.py
index 6c569b041..e6c117b1d 100644
--- a/dlk/python/dlk/core/graph.py
+++ b/dlk/python/dlk/core/graph.py
@@ -16,14 +16,8 @@
 """Graph module."""
 from collections import OrderedDict, defaultdict
 from typing import cast, Any, Dict, List, Optional, Set, TYPE_CHECKING
-import functools
-
-from core.operators import Add, AveragePool, BatchNormalization, Constant, Conv, Identity, Input, \
-    MaxPool, Operator, Output, Transpose, QTZ_binary_mean_scaling, QTZ_linear_mid_tread_half, Reshape, Softmax, \
-    Relu, Flatten, Dropout, Gemm, SpaceToDepth, Mul, QTZ_binary_channel_wise_mean_scaling, ConcatOnDepth, Maximum, \
-    DepthToSpace, Split
-
-from core.graph_pattern_matching import sort_graph, find_pattern, Pattern
+from core.operators import Conv,  Operator
+from core.graph_pattern_matching import sort_graph
 
 
 class Graph(object):
diff --git a/dlk/python/dlk/core/graph_pattern_matching.py b/dlk/python/dlk/core/graph_pattern_matching.py
index 95862d260..306cb6d6b 100644
--- a/dlk/python/dlk/core/graph_pattern_matching.py
+++ b/dlk/python/dlk/core/graph_pattern_matching.py
@@ -17,23 +17,58 @@
 
 
 class Pattern:
+    """Pattern is a sub-graph based on the operator types.
+       It is a recursive pattern where a Pattern holds a operator type and a list of inputs.
+       Each input in this list is also a Pattern.
+    """
     def __init__(self, op=str(), inputs=list()):
         self.op = op
         self.inputs = inputs
 
 
 class NodeMatch:
+    """NodeMatch defines a sub-graph that match a given Pattern.
+       It is a recursive pattern where a NodeMatch holds a reference to the matched node and a list of inputs.
+       Each input in this list is also a NodeMatch.
+    """
     def __init__(self):
         self.node = None
         self.inputs = list()
 
 
 def find_pattern(graph, pattern):
+    """Helper function that find a pattern in a graph.
+
+    Parameters
+    ----------
+    graph : Graph
+        The input graph where we will try to find the given pattern.
+
+    pattern : Pattern
+        The pattern we want to look for.
+
+    Returns
+    -------
+    result : [NodeMatch]
+        A list of matches. Each element of the list is a NodeMatch.
+    """
     gm = GraphMatcher(graph)
     return gm.get_op_type_matches(pattern)
 
 
 def sort_graph(graph):
+    """Helper function to topologically sort a given graph.
+
+    Parameters
+    ----------
+    graph : Graph
+        The input graph to be sorted. It is not modified.
+
+    Returns
+    -------
+    result : [Operator]
+        A list of Operator. Each element of the list is a reference to a Operator object.
+    """
     exec_list = list()
     input_nodes = list()
     for node in graph.operators:
@@ -55,6 +90,19 @@ def sort_graph(graph):
 
 
 def top_order(output_node, exec_list, visited):
+    """It topologically sorts a given graph.
+
+    Parameters
+    ----------
+    output_node : Operator
+        The starting node. First one in the ordered list.
+
+    exec_list : [Operator]
+        The ordered list. Note that this is an output parameter.
+
+    visited : [str]
+        List of already visited nodes.
+    """
     if visited[output_node.name]:
         return
     for input_node in output_node.input_nodes:
@@ -65,6 +113,23 @@ def top_order(output_node, exec_list, visited):
 
 
 def get_nodes_in_branch(starting_node, stop_node, node_list):
+    """Helper function that gives us all nodes in a branch defined by a given node.
+       The starting node will be the output node of the branch.
+
+       Note that there is an optional stop node. stop_node is allowed to be None.
+
+    Parameters
+    ----------
+    starting_node : Operator
+        The starting node. This node is the output node of the defined branch.
+
+    stop_node : Operator
+        The last node in the path. If stop_node is None then this function will give us every node above
+        starting_node.
+
+    node_list : [Operator]
+        The list of nodes contained in the branch. Note that this is an output parameter.
+    """
     if starting_node == stop_node:
         return
     node_list.append(starting_node)
@@ -73,13 +138,9 @@ def get_nodes_in_branch(starting_node, stop_node, node_list):
         get_nodes_in_branch(node, stop_node, node_list)
 
 
-def match_to_execution_list(match, execution_list):
-    for input_node in match.inputs:
-        match_to_execution_list(input_node, execution_list)
-    execution_list.append(match.node)
-
-
 class GraphMatcher:
+    """GraphMatcher is used to find sub-graphs in the computational graph.
+    """
     def __init__(self, input_graph):
         self.graph_node_list = list()
         self.graph_node_list = sort_graph(input_graph)
diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py
index 223066470..d662d64fd 100644
--- a/dlk/python/dlk/core/operators.py
+++ b/dlk/python/dlk/core/operators.py
@@ -17,7 +17,6 @@
 import functools
 import copy
 from itertools import dropwhile
-from collections import OrderedDict
 from typing import cast, Any, Dict, Optional, TYPE_CHECKING
 from core.view import View
 from utils import classproperty
@@ -55,8 +54,6 @@ def __init__(self,
         self._check_consistency()
         self._rank = len(shape)
         self._available_buffer = ''
-        self._visited = False
-        self._prop_details = Dict
 
     def __update_shape(self, shape: List[int], dimension_format: str) -> None:
         self._shape: List[int] = shape
diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py
index 77e08312c..ca8d9a2b5 100644
--- a/dlk/python/dlk/core/optimizer.py
+++ b/dlk/python/dlk/core/optimizer.py
@@ -27,7 +27,17 @@
 
 
 def pass_dot_graph(graph: Graph, filename) -> None:
+    """Generate a GraphViz dot file from a given Graph.
 
+    Parameters
+    ----------
+    graph : Graph
+        The input graph to be converted into a dot script
+
+    filename : str
+        The file where we want to save the dot script
+
+    """
     dot_script = 'digraph {'
 
     code = {}
@@ -55,6 +65,14 @@ def pass_dot_graph(graph: Graph, filename) -> None:
 
 
 def pass_remove_identities(graph: Graph) -> None:
+    """Removes those nodes of a Graph that satisfies the condition node.op_type() == Identity.
+
+    Parameters
+    ----------
+    graph : Graph
+        The input graph. It will be modified in-place.
+
+    """
     p = Pattern("Identity")
     matches = find_pattern(graph, p)
     to_be_removed = list()
@@ -83,6 +101,18 @@ def pass_remove_identities(graph: Graph) -> None:
 
 
 def pass_transpose(graph: Graph) -> None:
+    """Changes the data order of every node to be NHWC.
+       The fastest changing dimension is C
+       N stands for batch size (on inference we assume is 1.
+       H and W are the height and width respectively.
+       C stands for depth (aka channels)
+
+    Parameters
+    ----------
+    graph : Graph
+        The input graph. It will be modified in-place.
+
+    """
     p = Pattern("*")
     matches = find_pattern(graph, p)
 
@@ -100,6 +130,21 @@ def pass_transpose(graph: Graph) -> None:
 
 
 def pass_precompute(graph: Graph, processed_nodes) -> bool:
+    """Given a node N, if the value of each input of N is known at compilation time then N will be executed.
+       The node N and its inputs will be replaced with a Constant node which holds the computed output of N.
+
+    Parameters
+    ----------
+    graph : Graph
+        The input graph. It will be modified in-place.
+    processed_nodes : list
+        The list of the processed nodes so far.
+
+    Returns
+    -------
+    result : bool
+        True if some nodes were precomputed, False otherwise.
+    """
     p = Pattern('*')
     matches = find_pattern(graph, p)
     processed_before_precompute = len(processed_nodes)
@@ -152,6 +197,25 @@ def pass_precompute(graph: Graph, processed_nodes) -> bool:
 
 
 def pass_propagate_quantization_details_into_conv(graph: Graph) -> None:
+    """Given a node N, it will propagate information about quantization into the convolution nodes.
+
+       There are two types of nodes. Those which preserve quantization (for example, Space2Depth because
+       does not affect the actual values of the input data, only changes it positions) and those which
+       destroy quantization (for example, BatchNormalization, because it involves float operations).
+
+       If there is path in the Graph which connect a Quantizer node Q to a Conv node C and every node between
+       Q and C preserve quantization (for example, Q -> Space2Depth -> Concat > Conv) then the details about the
+       quantizer Q should be propagated into the convolution node C.
+
+       This pass allows us to further process the convolution nodes later and maybe quantize these convolutions
+       based on these quantization details. Note that a convolution node has two inputs, input data and weights.
+       We propagate quantization details through both the input node branch and the weight node branch.
+
+    Parameters
+    ----------
+    graph : Graph
+        The input graph. It will be modified in-place.
+    """
     p = Pattern('*')
     matches = find_pattern(graph, p)
     qtypes = [
@@ -188,11 +252,25 @@ def pass_propagate_quantization_details_into_conv(graph: Graph) -> None:
 
 
 def pass_compute_thresholds(graph: Graph) -> None:
+    """Given a Quantizer node Q:
+         - if there is a backward path between Q and a convolution node and,
+         - every node N of that path satisfies the condition N.is_monotonic and,
+         - the convolution node C (the end of this path) is a quantized convolution
+       then this pass construct an LUT per channel which maps a possible output value of the quantized convolution node
+       C to the corresponding output of the quantization node Q. This effectively compress the path C -> ... -> Q
+       into a list of LUTs that can be used during inference.
+
+    Parameters
+    ----------
+    graph : Graph
+        The input graph. It will be modified in-place.
+    """
     p = Pattern('QTZ_linear_mid_tread_half')
     matches = find_pattern(graph, p)
 
     to_be_removed = []
     for m in matches:
+        # find a a backward path between the quantizer and the convolution ie. a path represented by a list [Q, ..., C]
         p = [m.node]
         while p[-1].op_type != 'Conv':
             non_variable_input = [inode for inode in p[-1].input_nodes
@@ -240,7 +318,7 @@ def pass_compute_thresholds(graph: Graph) -> None:
         for th_id, th_v in enumerate(th_val):
             init_threshold = np.full(ch, th_v, dtype=np.float64)
 
-            # run calculation in reverse order: q -> bn -> scaling
+            # run calculation in reverse order, for example, q -> bn -> scaling
             trans_th = {'data': init_threshold}
             for op in p[:-1]:
                 trans_th = op.de_run(**trans_th)
@@ -288,6 +366,15 @@ def pass_compute_thresholds(graph: Graph) -> None:
 
 
 def pass_pack_weights(graph: Graph) -> None:
+    """Given a Quantized convolution node C, it will pack the weights of C into 32 bit words.
+       If the node Q that apply quantization to the weights of C quantizes, for example, into 1 bit values
+       then one 32 bit word will contain 32 weights.
+
+    Parameters
+    ----------
+    graph : Graph
+        The input graph. It will be modified in-place.
+    """
     p = Pattern('Conv')
     matches = find_pattern(graph, p)
     quantization_types = [
@@ -345,6 +432,15 @@ def pass_pack_weights(graph: Graph) -> None:
 
 
 def pass_quantize_convolutions(graph: Graph) -> None:
+    """Given a convolution node C, if C has proper quantization details, it will mark C as quantized and it will
+       assign the correct output data types to the node C and its quantizers. Note that the expected output data type
+       on the runtime is defined as QUANTIZED_NOT_PACKED.
+
+    Parameters
+    ----------
+    graph : Graph
+        The input graph. It will be modified in-place.
+    """
     p = Pattern('Conv')
     matches = find_pattern(graph, p)
 
@@ -369,6 +465,13 @@ def pass_quantize_convolutions(graph: Graph) -> None:
 
 
 def pass_propagate_datatypes(graph) -> None:
+    """Further propagate output data types.
+
+    Parameters
+    ----------
+    graph : Graph
+        The input graph. It will be modified in-place.
+    """
     p = Pattern('*')
     matches = find_pattern(graph, p)
 
@@ -378,6 +481,19 @@ def pass_propagate_datatypes(graph) -> None:
 
 
 def pass_propagate_output_type_backward(graph: Graph) -> None:
+    """It is assumed that the output data type of a Graph is float.
+       We should propagate this assumption backwards from the output node of the graph to the
+       latest quantized convolution available.
+
+       There could be cases where the latest convolution node Q is a quantized convolution and we also apply
+       thresholds to its outputs. In this cases, the quantized convolution output data type should be float
+       even if thresholds are applied.
+
+    Parameters
+    ----------
+    graph : Graph
+        The input graph. It will be modified in-place.
+    """
     p = Pattern('*')
     matches = find_pattern(graph, p)
 
diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py
index 8bd0a2d63..ef3d21573 100644
--- a/dlk/python/dlk/scripts/generate_project.py
+++ b/dlk/python/dlk/scripts/generate_project.py
@@ -39,7 +39,7 @@
 
 
 def optimize_graph_step(model: Model, config: Config) -> None:
-    """Optimze graph in the model.
+    """Optimize graph in the model.
 
     Parameters
     ----------

From ecf57f6a1cba02dca945e28caaef47222a6ef0a7 Mon Sep 17 00:00:00 2001
From: Antonio <antonio@leapmind.io>
Date: Tue, 25 Dec 2018 13:50:31 +0900
Subject: [PATCH 24/45] Fix PEP8

---
 dlk/python/dlk/core/graph.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dlk/python/dlk/core/graph.py b/dlk/python/dlk/core/graph.py
index e6c117b1d..c09bcdf0d 100644
--- a/dlk/python/dlk/core/graph.py
+++ b/dlk/python/dlk/core/graph.py
@@ -16,7 +16,7 @@
 """Graph module."""
 from collections import OrderedDict, defaultdict
 from typing import cast, Any, Dict, List, Optional, Set, TYPE_CHECKING
-from core.operators import Conv,  Operator
+from core.operators import Conv, Operator
 from core.graph_pattern_matching import sort_graph
 
 

From b284dabaaf8bad4978b963b4b4c644a4657b1a0a Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Tue, 8 Jan 2019 11:24:20 +0900
Subject: [PATCH 25/45] Fix comment typo

---
 dlk/python/dlk/core/optimizer.py      | 2 +-
 dlk/python/dlk/templates/Makefile.tpl | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py
index ca8d9a2b5..6d59e6554 100644
--- a/dlk/python/dlk/core/optimizer.py
+++ b/dlk/python/dlk/core/optimizer.py
@@ -334,7 +334,7 @@ def pass_compute_thresholds(graph: Graph) -> None:
                         if (scaling_factor < 0) ^ (ch_id in trans_th['nega_idx']) \
                         else int(math.ceil(th_per_ch))
 
-        # take care of threshold values that are larger than 16-bit signed integer
+        # take care of threshold values that are larger than 13-bit signed integer
         threshold_table[abs(threshold_table) > max_th_value] = max_th_value
 
         for c in range(ch):
diff --git a/dlk/python/dlk/templates/Makefile.tpl b/dlk/python/dlk/templates/Makefile.tpl
index b8c417f0d..14d5a2b8b 100644
--- a/dlk/python/dlk/templates/Makefile.tpl
+++ b/dlk/python/dlk/templates/Makefile.tpl
@@ -134,17 +134,17 @@ clean:
 	-$(RM) $(OBJ)
 
 lm_x86:           CXX = g++
-lm_x86:           FLAGS += $(INCLUDES) -O3 -std=c++0x -g -DUSE_PNG -pthread -g  -DFUNC_TIME_MEASUREMENT
+lm_x86:           FLAGS += $(INCLUDES) -O3 -std=c++0x -g -DUSE_PNG -pthread -g
 
 lm_aarch64:       CXX = aarch64-linux-gnu-g++
-lm_aarch64:       FLAGS += $(INCLUDES) -O3 -std=c++0x -g -DUSE_NEON -DUSE_PNG -pthread -g  -DFUNC_TIME_MEASUREMENT
+lm_aarch64:       FLAGS += $(INCLUDES) -O3 -std=c++0x -g -DUSE_NEON -DUSE_PNG -pthread -g
 
 lm_arm:           CXX = arm-linux-gnueabihf-g++
-lm_arm:           FLAGS += $(INCLUDES) -std=c++0x -O3 -DUSE_NEON -DUSE_PNG -mcpu=cortex-a9 -mfpu=neon -mthumb -s -pthread -g -fopenmp  -DFUNC_TIME_MEASUREMENT
+lm_arm:           FLAGS += $(INCLUDES) -std=c++0x -O3 -DUSE_NEON -DUSE_PNG -mcpu=cortex-a9 -mfpu=neon -mthumb -s -pthread -g -fopenmp
 lm_arm:           CXXFLAGS +=
 
 lm_fpga:          CXX = arm-linux-gnueabihf-g++
-lm_fpga:          FLAGS += $(INCLUDES) -std=c++0x -O3 -DUSE_NEON -DRUN_ON_FPGA -DUSE_PNG -mcpu=cortex-a9 -mfpu=neon -mthumb -s -pthread -g -fopenmp -DFUNC_TIME_MEASUREMENT
+lm_fpga:          FLAGS += $(INCLUDES) -std=c++0x -O3 -DUSE_NEON -DRUN_ON_FPGA -DUSE_PNG -mcpu=cortex-a9 -mfpu=neon -mthumb -s -pthread -g -fopenmp
 lm_fpga:          CXXFLAGS +=
 
 lib_x86:           CXX = g++

From 9f01c686ddc66a1d49742781f98b6f1583d0c685 Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Tue, 8 Jan 2019 11:28:50 +0900
Subject: [PATCH 26/45] remove redundant method and flags

---
 dlk/python/dlk/core/optimizer.py           | 38 ----------------------
 dlk/python/dlk/scripts/generate_project.py |  2 +-
 dlk/tests/test_optimizer.py                |  2 +-
 3 files changed, 2 insertions(+), 40 deletions(-)

diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py
index 6d59e6554..0be25edf2 100644
--- a/dlk/python/dlk/core/optimizer.py
+++ b/dlk/python/dlk/core/optimizer.py
@@ -26,44 +26,6 @@
 from modules.packer import Packer
 
 
-def pass_dot_graph(graph: Graph, filename) -> None:
-    """Generate a GraphViz dot file from a given Graph.
-
-    Parameters
-    ----------
-    graph : Graph
-        The input graph to be converted into a dot script
-
-    filename : str
-        The file where we want to save the dot script
-
-    """
-    dot_script = 'digraph {'
-
-    code = {}
-    counter = 0
-    for node in graph.operators:
-        code[node.name] = counter
-        counter += 1
-
-    for node in graph.operators:
-
-        shape = '-'
-        if node.shape:
-            shape = 'x'.join(str(x) for x in node.shape)
-        shape += '(' + node.dimension + ')'
-
-        dot_script += node.name + '[label="<f0> ' + format(code[node.name], '04X') + '| <f1> ' + \
-            node.op_type + '| <f2> ' + shape + '| <f3> ' + node.dtype.cpptype() + '" shape = "record"];'
-        for i in node.input_nodes:
-            dot_script += i.name + ' -> ' + node.name + ';'
-
-    dot_script += '}'
-
-    with open(filename, 'w') as f:
-        f.write(dot_script)
-
-
 def pass_remove_identities(graph: Graph) -> None:
     """Removes those nodes of a Graph that satisfies the condition node.op_type() == Identity.
 
diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py
index ef3d21573..c19d3933b 100644
--- a/dlk/python/dlk/scripts/generate_project.py
+++ b/dlk/python/dlk/scripts/generate_project.py
@@ -31,7 +31,7 @@
 from frontend import TensorFlowIO
 from core.optimizer import pass_remove_identities, pass_transpose, pass_precompute, \
     pass_propagate_quantization_details_into_conv, pass_compute_thresholds, pass_pack_weights, \
-    pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward, pass_dot_graph
+    pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward
 
 SCRITPS_DIR = path.abspath(path.dirname(__file__))
 DLK_ROOT_DIR = path.abspath(path.join(SCRITPS_DIR, '..'))
diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py
index 9e0d60db0..88cbfbeaa 100644
--- a/dlk/tests/test_optimizer.py
+++ b/dlk/tests/test_optimizer.py
@@ -18,7 +18,7 @@
 from core.data_types import Float32, Uint32, Int32, QUANTIZED_NOT_PACKED
 from core.optimizer import pass_remove_identities, pass_transpose, pass_precompute, \
     pass_propagate_quantization_details_into_conv, pass_compute_thresholds, pass_pack_weights, \
-    pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward, pass_dot_graph
+    pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward
 from core.graph import Graph
 from core.operators import Add, AveragePool, BatchNormalization, Constant, Conv, Identity, Input, \
     MaxPool, Operator, Output, Transpose, QTZ_binary_mean_scaling, QTZ_linear_mid_tread_half, Reshape, Softmax

From 14cf8cc73701b7c1382f910da9783765f31dfec1 Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Tue, 8 Jan 2019 13:42:38 +0900
Subject: [PATCH 27/45] Implement preserve_quantization in each operator

---
 dlk/python/dlk/core/operators.py | 48 +++++++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py
index d662d64fd..de6f905d2 100644
--- a/dlk/python/dlk/core/operators.py
+++ b/dlk/python/dlk/core/operators.py
@@ -485,7 +485,9 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li
 
     @property
     def preserve_quantization(self) -> bool:
-        return False
+        """whether to preserve the operator for quantization"""
+        raise NotImplementedError(
+            f'Preservation for quantization of operator {self.op_type} is not defined.')
 
 
 class Variable(Operator):
@@ -525,6 +527,10 @@ def data(self) -> np.ndarray:
     def data(self, val: np.ndarray) -> None:
         self._data = val
 
+    @property
+    def preserve_quantization(self) -> bool:
+        return False
+
 
 class Input(Variable):
     """Input class. This is a placeholder."""
@@ -676,6 +682,10 @@ def max_v(self) -> float:
     def scaling_factor(self) -> np.float32:
         return self._scaling_factor
 
+    @property
+    def preserve_quantization(self) -> bool:
+        return False
+
     @scaling_factor.setter
     def scaling_factor(self, val: np.float32) -> None:
         self._scaling_factor = val
@@ -1324,6 +1334,10 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li
     def _dispatch_name(self) -> str:
         return "batch_normalization"
 
+    @property
+    def preserve_quantization(self) -> bool:
+        return False
+
 
 class QTZ_linear_mid_tread_half(Quantizer):
     """Quantization operator with 'linear mid tread half'.
@@ -1506,6 +1520,10 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li
 
         return output_shape
 
+    @property
+    def preserve_quantization(self) -> bool:
+        return False
+
 
 class Pool(Operator):
     """Pooling operator.
@@ -1634,6 +1652,10 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li
         perm = [format.index(s) for s in 'NCHW']
         return [NCHW[i] for i in perm]
 
+    @property
+    def preserve_quantization(self) -> bool:
+        return False
+
 
 class MaxPool(Pool):
     """Max pooling operator.
@@ -1852,6 +1874,10 @@ def run_forward(self) -> np.ndarray:
         self._data = exp / np.expand_dims(exp.sum(axis=-1), -1)
         return self._data
 
+    @property
+    def preserve_quantization(self) -> bool:
+        return False
+
 
 class Relu(Operator):
     """Relu class.
@@ -1887,6 +1913,10 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li
                     attrs: Dict[str, Any]) -> List[int]:
         return lists['X']
 
+    @property
+    def preserve_quantization(self) -> bool:
+        return False
+
 
 class Flatten(Operator):
     """Flatten class.
@@ -2010,6 +2040,10 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li
                     attrs: Dict[str, Any]) -> List[int]:
         return lists['data']
 
+    @property
+    def preserve_quantization(self) -> bool:
+        return False
+
 
 class Gemm(Operator):
     """Gemm operator.
@@ -2096,6 +2130,10 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li
 
         return [M, N]
 
+    @property
+    def preserve_quantization(self) -> bool:
+        return False
+
 
 class Mul(Operator):
     """Mul operator.
@@ -2171,6 +2209,10 @@ def run_forward(self) -> np.ndarray:
     def is_monotonic(self) -> bool:
         return False
 
+    @property
+    def preserve_quantization(self) -> bool:
+        return False
+
     @classmethod
     def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: List[str],
                     attrs: Dict[str, Any]) -> List[int]:
@@ -2369,6 +2411,10 @@ def _dispatch_name(self) -> str:
     def is_monotonic(self) -> bool:
         return False
 
+    @property
+    def preserve_quantization(self) -> bool:
+        return False
+
 
 class DepthToSpace(Operator):
     """Depth to Space operator.

From 74da1fd8ca9a3214d7b8b6c9c7a97bfbd8bc5fd8 Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Tue, 8 Jan 2019 13:45:16 +0900
Subject: [PATCH 28/45] Move nega_idx aggregation into optimization pass

---
 dlk/python/dlk/core/operators.py | 2 --
 dlk/python/dlk/core/optimizer.py | 8 ++++++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py
index de6f905d2..f4cd00b39 100644
--- a/dlk/python/dlk/core/operators.py
+++ b/dlk/python/dlk/core/operators.py
@@ -1294,8 +1294,6 @@ def run(self, **kwargs) -> Dict:
         mean = np.float64(self._input_ops['mean'].data)
         var = np.float64(self._input_ops['var'].data)
 
-        kwargs['nega_idx'] = [v for v in range(len(scale)) if scale[v] < 0]
-
         x_norm = (kwargs['data'] - mean) / np.sqrt(var + self.epsilon)
         kwargs['data'] = scale * x_norm + beta
         return kwargs
diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py
index 0be25edf2..642687981 100644
--- a/dlk/python/dlk/core/optimizer.py
+++ b/dlk/python/dlk/core/optimizer.py
@@ -281,19 +281,23 @@ def pass_compute_thresholds(graph: Graph) -> None:
             init_threshold = np.full(ch, th_v, dtype=np.float64)
 
             # run calculation in reverse order, for example, q -> bn -> scaling
+            bn_nega_idx = []
             trans_th = {'data': init_threshold}
             for op in p[:-1]:
                 trans_th = op.de_run(**trans_th)
+                if op.op_type == 'BatchNormalization':
+                    bn_scale = op.input_ops['scale'].data
+                    bn_nega_idx = [v for v in range(len(bn_scale)) if bn_scale[v] < 0]
             threshold = (trans_th['data'] * np.float64(n)) / (np.float64(max_v) * scaling_factor)
 
             for ch_id, th_per_ch in enumerate(threshold):
                 if quantizer_conv_weights.op_type == 'QTZ_binary_channel_wise_mean_scaling':
                     threshold_table[ch_id, th_id] = int(math.floor(th_per_ch)) \
-                        if (scaling_factor[ch_id] < 0) ^ (ch_id in trans_th['nega_idx']) \
+                        if (scaling_factor[ch_id] < 0) ^ (ch_id in bn_nega_idx) \
                         else int(math.ceil(th_per_ch))
                 else:
                     threshold_table[ch_id, th_id] = int(math.floor(th_per_ch)) \
-                        if (scaling_factor < 0) ^ (ch_id in trans_th['nega_idx']) \
+                        if (scaling_factor < 0) ^ (ch_id in bn_nega_idx) \
                         else int(math.ceil(th_per_ch))
 
         # take care of threshold values that are larger than 13-bit signed integer

From 8417067ba6c88bcc12de6f58bed73fe1e4fa7696 Mon Sep 17 00:00:00 2001
From: Nikolay Nez <nikolay.nez@gmail.com>
Date: Tue, 8 Jan 2019 15:25:19 +0900
Subject: [PATCH 29/45] Move while loop inside of pass_precompute

---
 dlk/python/dlk/core/optimizer.py           | 103 ++++++++++-----------
 dlk/python/dlk/scripts/generate_project.py |   4 +-
 dlk/tests/test_optimizer.py                |  12 +--
 3 files changed, 55 insertions(+), 64 deletions(-)

diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py
index 642687981..76a5e704d 100644
--- a/dlk/python/dlk/core/optimizer.py
+++ b/dlk/python/dlk/core/optimizer.py
@@ -91,7 +91,7 @@ def pass_transpose(graph: Graph) -> None:
         m.node.transpose(permutation)
 
 
-def pass_precompute(graph: Graph, processed_nodes) -> bool:
+def pass_precompute(graph: Graph) -> None:
     """Given a node N, if the value of each input of N is known at compilation time then N will be executed.
        The node N and its inputs will be replaced with a Constant node which holds the computed output of N.
 
@@ -101,61 +101,60 @@ def pass_precompute(graph: Graph, processed_nodes) -> bool:
         The input graph. It will be modified in-place.
     processed_nodes : list
         The list of the processed nodes so far.
-
-    Returns
-    -------
-    result : bool
-        True if some nodes were precomputed, False otherwise.
     """
-    p = Pattern('*')
-    matches = find_pattern(graph, p)
-    processed_before_precompute = len(processed_nodes)
-    to_be_removed = []
-
-    for m in matches:
-        if m.node in processed_nodes:
-            continue
-
-        # We want operators with inputs
-        if not m.node.input_nodes:
-            continue
-
-        precomputable = True
-        for input_node in m.node.input_nodes:
-            if input_node.op_type != 'Constant':
-                precomputable = False
-
-        if not precomputable:
-            continue
-
-        processed_nodes += m.node.input_nodes
-        processed_nodes.append(m.node)
 
-        data = m.node.run_forward()
-
-        new_constant = Constant(
-            m.node.name + '_new',
-            m.node.dtype,
-            data,
-            dimension_format=m.node.dimension
-        )
-        graph.add_op(new_constant)
-
-        # get nodes to be removed after being disconnected
-        get_nodes_in_branch(m.node, None, to_be_removed)
-
-        new_constant.add_outputs({'output': m.node.output_ops.values()})
-        for output_name, consumer_list in m.node.output_ops.items():
-            for consumer_node in consumer_list:
-                for input_name, input_node in consumer_node.input_ops.items():
-                    if input_node == m.node:
-                        consumer_node.add_input(input_name, new_constant)
-                        break
+    done = False
+    processed_nodes = []
+    while not done:
+        p = Pattern('*')
+        matches = find_pattern(graph, p)
+        processed_before_precompute = len(processed_nodes)
+        to_be_removed = []
+
+        for m in matches:
+            if m.node in processed_nodes:
+                continue
+
+            # We want operators with inputs
+            if not m.node.input_nodes:
+                continue
+
+            precomputable = True
+            for input_node in m.node.input_nodes:
+                if input_node.op_type != 'Constant':
+                    precomputable = False
+
+            if not precomputable:
+                continue
+
+            processed_nodes += m.node.input_nodes
+            processed_nodes.append(m.node)
+
+            data = m.node.run_forward()
+
+            new_constant = Constant(
+                m.node.name + '_new',
+                m.node.dtype,
+                data,
+                dimension_format=m.node.dimension
+            )
+            graph.add_op(new_constant)
+
+            # get nodes to be removed after being disconnected
+            get_nodes_in_branch(m.node, None, to_be_removed)
+
+            new_constant.add_outputs({'output': m.node.output_ops.values()})
+            for output_name, consumer_list in m.node.output_ops.items():
+                for consumer_node in consumer_list:
+                    for input_name, input_node in consumer_node.input_ops.items():
+                        if input_node == m.node:
+                            consumer_node.add_input(input_name, new_constant)
+                            break
 
-    for op in to_be_removed:
-        graph.remove_op(op)
+        for op in to_be_removed:
+            graph.remove_op(op)
 
-    return len(processed_nodes) > processed_before_precompute
+        done = len(processed_nodes) == processed_before_precompute
 
 
 def pass_propagate_quantization_details_into_conv(graph: Graph) -> None:
diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py
index c19d3933b..74b7f0d54 100644
--- a/dlk/python/dlk/scripts/generate_project.py
+++ b/dlk/python/dlk/scripts/generate_project.py
@@ -65,9 +65,7 @@ def optimize_graph_step(model: Model, config: Config) -> None:
         pass_propagate_output_type_backward(graph)
     pass_propagate_datatypes(graph)
 
-    processed_nodes = []
-    while pass_precompute(graph, processed_nodes=processed_nodes):
-        pass
+    pass_precompute(graph)
 
 
 def generate_code_step(model: Model, config: Config) -> None:
diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py
index 88cbfbeaa..dc46a8527 100644
--- a/dlk/tests/test_optimizer.py
+++ b/dlk/tests/test_optimizer.py
@@ -41,9 +41,7 @@ def test_precompute1(self) -> None:
         pass_remove_identities(graph1)
         pass_transpose(graph1)
 
-        processed_nodes = []
-        while pass_precompute(graph1, processed_nodes=processed_nodes):
-            pass
+        pass_precompute(graph1)
 
         self.assertEqual(graph1, graph2, 'precompute failed.')
 
@@ -66,9 +64,7 @@ def test_precompute2(self) -> None:
 
         pass_propagate_datatypes(graph1)
 
-        processed_nodes = []
-        while pass_precompute(graph1, processed_nodes=processed_nodes):
-            pass
+        pass_precompute(graph1)
 
         self.assertEqual(graph1, graph2, 'precompute failed.')
         self.assertAlmostEqual(graph1.get_op('conv2').quantizer.scaling_factor, scaling2)  # type: ignore
@@ -92,9 +88,7 @@ def test_precompute3(self) -> None:
 
         pass_propagate_datatypes(graph1)
 
-        processed_nodes = []
-        while pass_precompute(graph1, processed_nodes=processed_nodes):
-            pass
+        pass_precompute(graph1)
 
         self.assertEqual(graph1, graph2, 'precompute failed.')
         self.assertAlmostEqual(graph1.get_op('conv2').quantizer.scaling_factor, scaling2)  # type: ignore

From a0d4a33ebbe7f22898ebfee98199092eee2edac8 Mon Sep 17 00:00:00 2001
From: Nikolay Nez <nikolay.nez@gmail.com>
Date: Tue, 8 Jan 2019 15:27:46 +0900
Subject: [PATCH 30/45] Rename pass_precompute to pass_constant_folding

---
 dlk/python/dlk/core/optimizer.py           | 2 +-
 dlk/python/dlk/scripts/generate_project.py | 4 ++--
 dlk/tests/test_optimizer.py                | 8 ++++----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py
index 76a5e704d..9de82dfdd 100644
--- a/dlk/python/dlk/core/optimizer.py
+++ b/dlk/python/dlk/core/optimizer.py
@@ -91,7 +91,7 @@ def pass_transpose(graph: Graph) -> None:
         m.node.transpose(permutation)
 
 
-def pass_precompute(graph: Graph) -> None:
+def pass_constant_folding(graph: Graph) -> None:
     """Given a node N, if the value of each input of N is known at compilation time then N will be executed.
        The node N and its inputs will be replaced with a Constant node which holds the computed output of N.
 
diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py
index 74b7f0d54..b37982dd2 100644
--- a/dlk/python/dlk/scripts/generate_project.py
+++ b/dlk/python/dlk/scripts/generate_project.py
@@ -29,7 +29,7 @@
 from core.params import Params
 from code_generater import CodeGenerater
 from frontend import TensorFlowIO
-from core.optimizer import pass_remove_identities, pass_transpose, pass_precompute, \
+from core.optimizer import pass_remove_identities, pass_transpose, pass_constant_folding, \
     pass_propagate_quantization_details_into_conv, pass_compute_thresholds, pass_pack_weights, \
     pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward
 
@@ -65,7 +65,7 @@ def optimize_graph_step(model: Model, config: Config) -> None:
         pass_propagate_output_type_backward(graph)
     pass_propagate_datatypes(graph)
 
-    pass_precompute(graph)
+    pass_constant_folding(graph)
 
 
 def generate_code_step(model: Model, config: Config) -> None:
diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py
index dc46a8527..e05015346 100644
--- a/dlk/tests/test_optimizer.py
+++ b/dlk/tests/test_optimizer.py
@@ -16,7 +16,7 @@
 """Test file for Optimizer."""
 import unittest
 from core.data_types import Float32, Uint32, Int32, QUANTIZED_NOT_PACKED
-from core.optimizer import pass_remove_identities, pass_transpose, pass_precompute, \
+from core.optimizer import pass_remove_identities, pass_transpose, pass_constant_folding, \
     pass_propagate_quantization_details_into_conv, pass_compute_thresholds, pass_pack_weights, \
     pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward
 from core.graph import Graph
@@ -41,7 +41,7 @@ def test_precompute1(self) -> None:
         pass_remove_identities(graph1)
         pass_transpose(graph1)
 
-        pass_precompute(graph1)
+        pass_constant_folding(graph1)
 
         self.assertEqual(graph1, graph2, 'precompute failed.')
 
@@ -64,7 +64,7 @@ def test_precompute2(self) -> None:
 
         pass_propagate_datatypes(graph1)
 
-        pass_precompute(graph1)
+        pass_constant_folding(graph1)
 
         self.assertEqual(graph1, graph2, 'precompute failed.')
         self.assertAlmostEqual(graph1.get_op('conv2').quantizer.scaling_factor, scaling2)  # type: ignore
@@ -88,7 +88,7 @@ def test_precompute3(self) -> None:
 
         pass_propagate_datatypes(graph1)
 
-        pass_precompute(graph1)
+        pass_constant_folding(graph1)
 
         self.assertEqual(graph1, graph2, 'precompute failed.')
         self.assertAlmostEqual(graph1.get_op('conv2').quantizer.scaling_factor, scaling2)  # type: ignore

From 2a20dba37b6d289e93fdcf2a30ca61cdd541fdb8 Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Tue, 8 Jan 2019 15:32:21 +0900
Subject: [PATCH 31/45] rename find_input to output_dtype_changer

---
 dlk/python/dlk/core/optimizer.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py
index 9de82dfdd..b4416358a 100644
--- a/dlk/python/dlk/core/optimizer.py
+++ b/dlk/python/dlk/core/optimizer.py
@@ -462,14 +462,14 @@ def pass_propagate_output_type_backward(graph: Graph) -> None:
     p = Pattern('*')
     matches = find_pattern(graph, p)
 
-    def find_input(node, otype):
+    def output_dtype_changer(node, otype):
         for n in node.input_nodes:
             if n.op_type == 'Conv' and n.is_quantized:
                 n.dtype = otype
                 return
-            find_input(n, otype)
+            output_dtype_changer(n, otype)
 
     # propagate output data type to the last quantized convolution
     output_node = matches[-1].node
     output_type = output_node.dtype
-    find_input(output_node, output_type)
+    output_dtype_changer(output_node, output_type)

From fb51eee0a037d619c5874b32bc0af05396b39a78 Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Fri, 11 Jan 2019 10:02:50 +0900
Subject: [PATCH 32/45] remove some redundant lines

---
 dlk/python/dlk/core/operators.py | 2 --
 dlk/python/dlk/core/optimizer.py | 8 ++++----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py
index f4cd00b39..e54090d4f 100644
--- a/dlk/python/dlk/core/operators.py
+++ b/dlk/python/dlk/core/operators.py
@@ -1304,8 +1304,6 @@ def de_run(self, **kwargs) -> Dict:
         mean = np.float64(self._input_ops['mean'].data)
         var = np.float64(self._input_ops['var'].data)
 
-        kwargs['nega_idx'] = [v for v in range(len(scale)) if scale[v] < 0]
-
         kwargs['data'] = (((kwargs['data'] - beta) / scale) * np.sqrt(var + self.epsilon)) + mean
         return kwargs
 
diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py
index b4416358a..0f133aa21 100644
--- a/dlk/python/dlk/core/optimizer.py
+++ b/dlk/python/dlk/core/optimizer.py
@@ -243,7 +243,7 @@ def pass_compute_thresholds(graph: Graph) -> None:
 
         if p[-1].op_type != 'Conv':
             continue
-        quantizer_conv_output_node = p[0]
+        activation_quantizer_node = p[0]
         conv_node = p[-1]
 
         # check if this is a quantized convolution
@@ -313,13 +313,13 @@ def pass_compute_thresholds(graph: Graph) -> None:
         conv_node.thresholds = threshold_table.flatten().tolist()
 
         # get nodes to be removed after being disconnected
-        get_nodes_in_branch(quantizer_conv_output_node, conv_node, to_be_removed)
+        get_nodes_in_branch(activation_quantizer_node, conv_node, to_be_removed)
 
         # Disconnect the outputs of the quantizer
-        out_ops = quantizer_conv_output_node.output_ops['output']
+        out_ops = activation_quantizer_node.output_ops['output']
         for output_node in out_ops:
             for input_name, input_node in output_node.input_ops.items():
-                if input_node == quantizer_conv_output_node:
+                if input_node == activation_quantizer_node:
                     output_node.add_input(input_name, conv_node)
 
         # Disconnect the outputs of the conv

From 8dd88be40c2bdad4e7aedc77449b89c68cf86d7e Mon Sep 17 00:00:00 2001
From: Antonio <antonio@leapmind.io>
Date: Fri, 11 Jan 2019 16:11:40 +0900
Subject: [PATCH 33/45] Simplify the passes code

---
 dlk/python/dlk/core/graph_pattern_matching.py | 101 +-------------
 dlk/python/dlk/core/optimizer.py              | 125 ++++++++----------
 2 files changed, 57 insertions(+), 169 deletions(-)

diff --git a/dlk/python/dlk/core/graph_pattern_matching.py b/dlk/python/dlk/core/graph_pattern_matching.py
index 306cb6d6b..6eb328746 100644
--- a/dlk/python/dlk/core/graph_pattern_matching.py
+++ b/dlk/python/dlk/core/graph_pattern_matching.py
@@ -13,47 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
-"""Graph pattern matching module."""
-
-
-class Pattern:
-    """Pattern is a sub-graph based on the operator types.
-       It is a recursive pattern where a Pattern holds a operator type and a list of inputs.
-       Each input in this list is also a Pattern.
-    """
-    def __init__(self, op=str(), inputs=list()):
-        self.op = op
-        self.inputs = inputs
-
-
-class NodeMatch:
-    """NodeMatch defines a sub-graph that match a given Pattern.
-       It is a recursive pattern where a NodeMatch holds a reference to the matched node and a list of inputs.
-       Each input in this list is also a NodeMatch.
-    """
-    def __init__(self):
-        self.node = None
-        self.inputs = list()
-
-
-def find_pattern(graph, pattern):
-    """Helper function that find a pattern in a graph.
-
-    Parameters
-    ----------
-    graph : Graph
-        The input graph where we will try to find the given pattern.
-
-    pattern : Pattern
-        The pattern we want to look for.
-
-    Returns
-    -------
-    result : [NodeMatch]
-        A list of matches. Each element of the list is a NodeMatch.
-    """
-    gm = GraphMatcher(graph)
-    return gm.get_op_type_matches(pattern)
+"""Graph sorting helper functions."""
 
 
 def sort_graph(graph):
@@ -136,62 +96,3 @@ def get_nodes_in_branch(starting_node, stop_node, node_list):
 
     for node in starting_node.input_nodes:
         get_nodes_in_branch(node, stop_node, node_list)
-
-
-class GraphMatcher:
-    """GraphMatcher is used to find sub-graphs in the computational graph.
-    """
-    def __init__(self, input_graph):
-        self.graph_node_list = list()
-        self.graph_node_list = sort_graph(input_graph)
-
-        self._node_map = {node.name: node for node in self.graph_node_list}
-
-    def record_matched_nodes(self, match, matched_nodes):
-        matched_nodes.add(match.node.name)
-        for input_node in match.inputs:
-            self.record_matched_nodes(input_node, matched_nodes)
-
-    def get_op_type_matches(self, pattern):
-        matches = list()
-        matched_nodes = set()
-        for node in self.graph_node_list:
-            if node in matched_nodes:
-                continue
-
-            match = NodeMatch()
-            if self.does_op_type_match(node, pattern, matched_nodes, match):
-                self.record_matched_nodes(match, matched_nodes)
-                matches.append(match)
-        return matches
-
-    def does_op_type_match(self, node, pattern, previously_matched_nodes, match):
-        if node.name in previously_matched_nodes:
-            return False
-
-        pattern_matched = False
-        if pattern.op == '*':
-            pattern_matched = True
-        else:
-            for pattern_op in pattern.op.split('|'):
-                if node.op_type == pattern_op:
-                    pattern_matched = True
-        if not pattern_matched:
-            return False
-
-        match.node = node
-        if not pattern.inputs:
-            return True
-        if len(node.input_nodes) != len(pattern.inputs):
-            return False
-
-        for i in range(len(pattern.inputs)):
-            input_node = self._node_map[node.input_nodes[i].name]
-            input_pattern = pattern.inputs[i]
-            input_match = NodeMatch()
-            match.inputs.append(input_match)
-
-            if not self.does_op_type_match(input_node, input_pattern, previously_matched_nodes, input_match):
-                return False
-
-        return True
diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py
index 0f133aa21..7b4484783 100644
--- a/dlk/python/dlk/core/optimizer.py
+++ b/dlk/python/dlk/core/optimizer.py
@@ -18,7 +18,7 @@
 import numpy as np
 
 from core.graph import Graph
-from core.graph_pattern_matching import find_pattern, Pattern, get_nodes_in_branch
+from core.graph_pattern_matching import get_nodes_in_branch, sort_graph
 from core.operators import Constant, Operator
 from core.data_types import Uint32, QUANTIZED_NOT_PACKED
 from typing import cast
@@ -35,28 +35,26 @@ def pass_remove_identities(graph: Graph) -> None:
         The input graph. It will be modified in-place.
 
     """
-    p = Pattern("Identity")
-    matches = find_pattern(graph, p)
+    exec_list = [n for n in sort_graph(graph) if n.op_type == 'Identity']
     to_be_removed = list()
-
-    for m in matches:
+    for m in exec_list:
         """skip all identity."""
-        in_op = m.node.input_ops['input']
-        out_ops = m.node.output_ops['output']
+        in_op = m.input_ops['input']
+        out_ops = m.output_ops['output']
         for out_op in out_ops:
             for k, v in out_op.input_ops.items():
-                if v == m.node:
+                if v == m:
                     # change the output's input to this identity's input
                     out_op.add_input(k, in_op)
                     # change the input's output to this identity's output
                     for k2, v2 in in_op.output_ops.items():
-                        if m.node in v2:
-                            v2.remove(m.node)
+                        if m in v2:
+                            v2.remove(m)
                             v2.append(out_op)
                             break
                     break
 
-        to_be_removed.append(m.node)
+        to_be_removed.append(m)
 
     for op in to_be_removed:
         graph.remove_op(op)
@@ -75,12 +73,11 @@ def pass_transpose(graph: Graph) -> None:
         The input graph. It will be modified in-place.
 
     """
-    p = Pattern("*")
-    matches = find_pattern(graph, p)
+    exec_list = sort_graph(graph)
 
-    for m in matches:
-        dim = m.node.dimension
-        shape = m.node.shape
+    for m in exec_list:
+        dim = m.dimension
+        shape = m.shape
         if len(shape) != 4 or len(dim) != 4 or not set(dim).issubset({'N', 'H', 'W', 'C', 'I', 'O'}):
             continue
 
@@ -88,7 +85,7 @@ def pass_transpose(graph: Graph) -> None:
         dim = dim.replace('O', 'N')
 
         permutation = list(map(lambda s: dim.index(s), 'NHWC'))
-        m.node.transpose(permutation)
+        m.transpose(permutation)
 
 
 def pass_constant_folding(graph: Graph) -> None:
@@ -106,48 +103,47 @@ def pass_constant_folding(graph: Graph) -> None:
     done = False
     processed_nodes = []
     while not done:
-        p = Pattern('*')
-        matches = find_pattern(graph, p)
+        exec_list = sort_graph(graph)
         processed_before_precompute = len(processed_nodes)
         to_be_removed = []
 
-        for m in matches:
-            if m.node in processed_nodes:
+        for m in exec_list:
+            if m in processed_nodes:
                 continue
 
             # We want operators with inputs
-            if not m.node.input_nodes:
+            if not m.input_nodes:
                 continue
 
             precomputable = True
-            for input_node in m.node.input_nodes:
+            for input_node in m.input_nodes:
                 if input_node.op_type != 'Constant':
                     precomputable = False
 
             if not precomputable:
                 continue
 
-            processed_nodes += m.node.input_nodes
-            processed_nodes.append(m.node)
+            processed_nodes += m.input_nodes
+            processed_nodes.append(m)
 
-            data = m.node.run_forward()
+            data = m.run_forward()
 
             new_constant = Constant(
-                m.node.name + '_new',
-                m.node.dtype,
+                m.name + '_new',
+                m.dtype,
                 data,
-                dimension_format=m.node.dimension
+                dimension_format=m.dimension
             )
             graph.add_op(new_constant)
 
             # get nodes to be removed after being disconnected
-            get_nodes_in_branch(m.node, None, to_be_removed)
+            get_nodes_in_branch(m, None, to_be_removed)
 
-            new_constant.add_outputs({'output': m.node.output_ops.values()})
-            for output_name, consumer_list in m.node.output_ops.items():
+            new_constant.add_outputs({'output': m.output_ops.values()})
+            for output_name, consumer_list in m.output_ops.items():
                 for consumer_node in consumer_list:
                     for input_name, input_node in consumer_node.input_ops.items():
-                        if input_node == m.node:
+                        if input_node == m:
                             consumer_node.add_input(input_name, new_constant)
                             break
 
@@ -177,8 +173,7 @@ def pass_propagate_quantization_details_into_conv(graph: Graph) -> None:
     graph : Graph
         The input graph. It will be modified in-place.
     """
-    p = Pattern('*')
-    matches = find_pattern(graph, p)
+    exec_list = sort_graph(graph)
     qtypes = [
         'QTZ_binary_mean_scaling',
         'QTZ_linear_mid_tread_half',
@@ -186,29 +181,29 @@ def pass_propagate_quantization_details_into_conv(graph: Graph) -> None:
     ]
 
     quant_details = defaultdict(list)
-    for m in matches:
-        if not m.node.preserve_quantization:
-            quant_details[m.node.name] = []
+    for m in exec_list:
+        if not m.preserve_quantization:
+            quant_details[m.name] = []
             continue
 
-        if m.node.op_type == 'Conv':
-            input_node = m.node.input_nodes[0]
-            weight_node = m.node.input_nodes[1]
+        if m.op_type == 'Conv':
+            input_node = m.input_nodes[0]
+            weight_node = m.input_nodes[1]
 
-            m.node.a_quantizer = [input_node] if input_node.op_type in qtypes else quant_details[input_node.name]
-            m.node.quantizer = weight_node if weight_node.op_type in qtypes else quant_details[weight_node.name]
+            m.a_quantizer = [input_node] if input_node.op_type in qtypes else quant_details[input_node.name]
+            m.quantizer = weight_node if weight_node.op_type in qtypes else quant_details[weight_node.name]
 
-            quant_details[m.node.name] = []
+            quant_details[m.name] = []
         else:
             qtzs = []
-            for n in m.node.input_nodes:
+            for n in m.input_nodes:
                 if n.op_type in qtypes:
                     qtzs.append(n)
                 else:
                     for q in quant_details[n.name]:
                         qtzs.append(q)
 
-            quant_details[m.node.name] = qtzs if len(qtzs) == len(m.node.input_nodes) else []
+            quant_details[m.name] = qtzs if len(qtzs) == len(m.input_nodes) else []
             # TODO: check if the quantizers use same n_bits
 
 
@@ -226,13 +221,11 @@ def pass_compute_thresholds(graph: Graph) -> None:
     graph : Graph
         The input graph. It will be modified in-place.
     """
-    p = Pattern('QTZ_linear_mid_tread_half')
-    matches = find_pattern(graph, p)
-
+    exec_list = [n for n in sort_graph(graph) if n.op_type == 'QTZ_linear_mid_tread_half']
     to_be_removed = []
-    for m in matches:
+    for m in exec_list:
         # find a a backward path between the quantizer and the convolution ie. a path represented by a list [Q, ..., C]
-        p = [m.node]
+        p = [m]
         while p[-1].op_type != 'Conv':
             non_variable_input = [inode for inode in p[-1].input_nodes
                                   if (not cast(Operator, inode).is_variable and inode.is_monotonic)
@@ -340,8 +333,7 @@ def pass_pack_weights(graph: Graph) -> None:
     graph : Graph
         The input graph. It will be modified in-place.
     """
-    p = Pattern('Conv')
-    matches = find_pattern(graph, p)
+    exec_list = [n for n in sort_graph(graph) if n.op_type == 'Conv']
     quantization_types = [
         'QTZ_binary_mean_scaling',
         'QTZ_linear_mid_tread_half',
@@ -353,8 +345,8 @@ def pass_pack_weights(graph: Graph) -> None:
     packer = Packer(weight_bitwidth, word_size)
     to_be_removed = []
 
-    for m in matches:
-        conv_node = m.node
+    for m in exec_list:
+        conv_node = m
 
         # check if this is a quantized convolution
         if not conv_node.quantizer or not conv_node.a_quantizer:
@@ -406,11 +398,9 @@ def pass_quantize_convolutions(graph: Graph) -> None:
     graph : Graph
         The input graph. It will be modified in-place.
     """
-    p = Pattern('Conv')
-    matches = find_pattern(graph, p)
-
-    for m in matches:
-        conv_node = m.node
+    exec_list = [n for n in sort_graph(graph) if n.op_type == 'Conv']
+    for m in exec_list:
+        conv_node = m
 
         # check if this is a quantized convolution
         if not conv_node.quantizer or not conv_node.a_quantizer:
@@ -437,12 +427,10 @@ def pass_propagate_datatypes(graph) -> None:
     graph : Graph
         The input graph. It will be modified in-place.
     """
-    p = Pattern('*')
-    matches = find_pattern(graph, p)
-
-    for m in matches:
-        if m.node.op_type != 'Conv' and m.node.preserve_quantization:
-            m.node.dtype = m.node.input_nodes[0].dtype
+    exec_list = sort_graph(graph)
+    for m in exec_list:
+        if m.op_type != 'Conv' and m.preserve_quantization:
+            m.dtype = m.input_nodes[0].dtype
 
 
 def pass_propagate_output_type_backward(graph: Graph) -> None:
@@ -459,8 +447,7 @@ def pass_propagate_output_type_backward(graph: Graph) -> None:
     graph : Graph
         The input graph. It will be modified in-place.
     """
-    p = Pattern('*')
-    matches = find_pattern(graph, p)
+    exec_list = sort_graph(graph)
 
     def output_dtype_changer(node, otype):
         for n in node.input_nodes:
@@ -470,6 +457,6 @@ def output_dtype_changer(node, otype):
             output_dtype_changer(n, otype)
 
     # propagate output data type to the last quantized convolution
-    output_node = matches[-1].node
+    output_node = exec_list[-1]
     output_type = output_node.dtype
     output_dtype_changer(output_node, output_type)

From 406178db26d68036789383c57bf569eb9d797505 Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Wed, 16 Jan 2019 19:31:20 +0900
Subject: [PATCH 34/45] Add new tests for optimization passes

---
 dlk/tests/test_optimizer.py | 914 +++++-------------------------------
 1 file changed, 129 insertions(+), 785 deletions(-)

diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py
index e05015346..0c0555cdf 100644
--- a/dlk/tests/test_optimizer.py
+++ b/dlk/tests/test_optimizer.py
@@ -27,880 +27,224 @@
 from typing import Tuple
 
 
-class TestOptimizer(unittest.TestCase):
-    """Test class for GraphRunner."""
+class TestPassTranspose(unittest.TestCase):
+    """Test class for transposing pass."""
 
-    def test_precompute1(self) -> None:
-        """Test code for precompute optimizer."""
-        data1 = np.random.rand(3, 2, 2, 3)
-        data2 = np.random.rand(3, 2, 2, 3)
-        data3 = np.random.rand(3, 2, 2, 3)
-        graph1 = self.create_sample_graph(data1, data2, data3)
-        graph2 = self.create_precompute_graph(data1, data2, data3)
-
-        pass_remove_identities(graph1)
-        pass_transpose(graph1)
-
-        pass_constant_folding(graph1)
-
-        self.assertEqual(graph1, graph2, 'precompute failed.')
-
-        print("Precompute test #1 passed!")
-
-    def test_precompute2(self) -> None:
-        """Test code for precompute optimizer."""
-        data1 = np.random.rand(3, 2, 2, 3)
-        data2 = np.random.rand(3, 2, 2, 3)
-        data3 = np.random.rand(3, 2, 2, 3)
-        graph1 = self.create_sample_graph(data1, data2, data3)
-        graph2, scaling1, scaling2 = self.create_quantized_graph(data1, data2, data3)
-
-        pass_remove_identities(graph1)
-        pass_transpose(graph1)
-
-        pass_propagate_quantization_details_into_conv(graph1)
-        pass_pack_weights(graph1)
-        pass_quantize_convolutions(graph1)
-
-        pass_propagate_datatypes(graph1)
-
-        pass_constant_folding(graph1)
-
-        self.assertEqual(graph1, graph2, 'precompute failed.')
-        self.assertAlmostEqual(graph1.get_op('conv2').quantizer.scaling_factor, scaling2)  # type: ignore
-
-        print("Precompute test #2 passed!")
-
-    def test_precompute3(self) -> None:
-        """Test code for precompute optimizer."""
-        data1 = np.random.rand(3, 2, 2, 3)
-        data2 = np.random.rand(3, 2, 2, 3)
-        data3 = np.random.rand(3, 2, 2, 3)
-        graph1 = self.create_sample_graph3(data1, data2, data3)
-        graph2, scaling2, scaling3 = self.create_quantized_graph2(data1, data2, data3)
-
-        pass_remove_identities(graph1)
-        pass_transpose(graph1)
-
-        pass_propagate_quantization_details_into_conv(graph1)
-        pass_pack_weights(graph1)
-        pass_quantize_convolutions(graph1)
-
-        pass_propagate_datatypes(graph1)
-
-        pass_constant_folding(graph1)
-
-        self.assertEqual(graph1, graph2, 'precompute failed.')
-        self.assertAlmostEqual(graph1.get_op('conv2').quantizer.scaling_factor, scaling2)  # type: ignore
-        self.assertAlmostEqual(graph1.get_op('conv3').quantizer.scaling_factor, scaling3)  # type: ignore
-
-        print("Precompute test #3 passed!")
-
-    def test_transpose_NHWC(self) -> None:
-        """Test code for transpose_NHWC optimizer."""
-        data = np.random.rand(3, 2, 2, 1)
-        graph1 = self.create_sample_graph2(data)
-        graph2 = self.create_transposed_graph(data)
+    def test_pass_transpose(self) -> None:
+        """Test code for transposing optimizer pass."""
+        data1 = np.random.rand(3, 2, 2, 1)
+        graph1 = self.create_sample_graph(data1)
+        graph2 = self.create_expected_graph(data1)
 
         pass_transpose(graph1)
 
         self.assertEqual(graph1, graph2, 'transpose to NHWC failed.')
 
-        print("Transpose_NHWC test #1 passed!")
+        print("Test transpose #1 pass passed!")
 
-    def create_sample_graph(self, data1: np.ndarray, data2: np.ndarray, data3: np.ndarray) -> Graph:
+    @staticmethod
+    def create_sample_graph(data: np.ndarray) -> Graph:
         graph = Graph()
 
         # input
-        x = Input(
-            'placeholder',
-            [1, 5, 5, 3],
-            Float32(),
-        )
+        x = Input('placeholder', [3, 5, 5, 1], Float32(), dimension_format='CWHN')
 
         # constant and internal nodes
-        w = Constant(
-            'weight',
-            Float32(),
-            data1
-        )
-
-        i = Identity(
-            'identity1',
-            [3, 2, 2, 3],
-            Float32(),
-            {'input': w}
-        )
-
-        t = Transpose(
-            'transpose1',
-            [3, 2, 2, 3],
-            Float32(),
-            {'data': i},
-            perm=[3, 2, 1, 0]
-        )
-
-        q = QTZ_binary_mean_scaling(
-            'qtz1',
-            [3, 2, 2, 3],
-            Float32(),
-            {'input': t}
-        )
+        w = Constant('weight', Float32(), data, dimension_format='CWHN')
+        i1 = Identity('identity1', [3, 2, 2, 1], Float32(), {'input': w}, dimension_format='CWHN')
+        q = QTZ_binary_mean_scaling('qtz1', [3, 2, 2, 1], Float32(), {'input': i1}, dimension_format='CWHN')
 
         # Conv
-        conv1 = Conv(
-            'conv1',
-            [1, 4, 4, 3],
-            Float32(),
-            {'X': x, 'W': q},
-            kernel_shape=[2, 2]
-        )
-
-        i2 = Identity(
-            'identity2',
-            [1, 4, 4, 3],
-            Float32(),
-            {'input': conv1}
-        )
-
-        s1 = Constant(
-            'aq_const1',
-            Float32(),
-            np.array(1)
-        )
-
-        s2 = Constant(
-            'aq_const2',
-            Float32(),
-            np.array(2)
-        )
-
-        aq = QTZ_linear_mid_tread_half(
-            'aqtz1',
-            [1, 4, 4, 3],
-            Float32(),
-            {'X': i2, 'Y': s1, 'Z': s2}
-        )
-
-        dummy = Transpose(
-            'dummy',
-            [1, 4, 4, 3],
-            Float32(),
-            {'data': aq},
-            perm=[0, 1, 2, 3]
-        )
-
-        w2 = Constant(
-            'weight2',
-            Float32(),
-            data2
-        )
-
-        q2 = QTZ_binary_mean_scaling(
-            'qtz2',
-            [3, 2, 2, 3],
-            Float32(),
-            {'input': w2}
-        )
-
-        conv2 = Conv(
-            'conv2',
-            [1, 3, 3, 3],
-            Float32(),
-            {'X': dummy, 'W': q2},
-            kernel_shape=[2, 2]
-        )
-
-        s3 = Constant(
-            'aq_const1',
-            Float32(),
-            np.array(1)
-        )
-
-        s4 = Constant(
-            'aq_const2',
-            Float32(),
-            np.array(2)
-        )
-
-        aq2 = QTZ_linear_mid_tread_half(
-            'aqtz2',
-            [1, 3, 3, 3],
-            Float32(),
-            {'X': conv2, 'Y': s3, 'Z': s4}
-        )
-
-        w3 = Constant(
-            'weight3',
-            Float32(),
-            data3
-        )
-
-        i3 = Identity(
-            'identity3',
-            [1, 3, 3, 3],
-            Float32(),
-            {'input': aq2}
-        )
-
-        conv3 = Conv(
-            'conv3',
-            [1, 2, 2, 3],
-            Float32(),
-            {'X': i3, 'W': w3},
-            kernel_shape=[2, 2]
-        )
+        conv = Conv('conv', [3, 4, 4, 1], Float32(), {'X': x, 'W': q}, kernel_shape=[2, 2], dimension_format='CWHN')
+
+        rs = Reshape('reshape', [1, 48], Float32(), {'data': conv})
 
         # One output
-        y = Output(
-            'output',
-            [1, 2, 2, 3],
-            Float32(),
-            {'input': conv3}
-        )
+        y = Output('output', [1, 48], Float32(), {'input': rs},)
 
         # add ops to the graph
         graph.add_op_and_inputs(y)
 
         return graph
 
-    def binary_mean_scaling(self, data: np.ndarray) -> Tuple[np.float32, np.ndarray]:
-        return np.mean(np.abs(data)), np.sign(data).astype(np.float32)
-
-    def create_precompute_graph(self, data1: np.ndarray, data2: np.ndarray, data3: np.ndarray) -> Graph:
+    @staticmethod
+    def create_expected_graph(data: np.ndarray) -> Graph:
         graph = Graph()
 
-        # two inputs
-        x = Input(
-            'placeholder',
-            [1, 5, 5, 3],
-            Float32(),
-        )
+        data = data.transpose([3, 2, 1, 0])
+
+        # input
+        x = Input('placeholder', [1, 5, 5, 3], Float32(), dimension_format='NHWC')
 
-        scaling1, qdata = self.binary_mean_scaling(data1.transpose([3, 2, 1, 0]))
-        w = Constant(
-            'weight',
-            Float32(),
-            qdata * scaling1
-        )
+        # constant and internal nodes
+        w = Constant('weight', Float32(), data, dimension_format='NHWC')
+        i = Identity('identity1', [1, 2, 2, 3], Float32(), {'input': w}, dimension_format='NHWC')
+        q = QTZ_binary_mean_scaling('qtz1', [1, 2, 2, 3], Float32(), {'input': i}, dimension_format='NHWC')
 
         # Conv
-        conv1 = Conv(
-            'conv1',
-            [1, 4, 4, 3],
-            Float32(),
-            {'X': x, 'W': w},
-            kernel_shape=[2, 2]
-        )
-
-        s1 = Constant(
-            'aq_const1',
-            Float32(),
-            np.array(1)
-        )
-
-        s2 = Constant(
-            'aq_const2',
-            Float32(),
-            np.array(2)
-        )
-
-        aq = QTZ_linear_mid_tread_half(
-            'aqtz1',
-            [1, 4, 4, 3],
-            Float32(),
-            {'X': conv1, 'Y': s1, 'Z': s2}
-        )
-
-        dummy = Transpose(
-            'dummy',
-            [1, 4, 4, 3],
-            Float32(),
-            {'data': aq},
-            perm=[0, 1, 2, 3]
-        )
-
-        scaling2, qdata2 = self.binary_mean_scaling(data2)
-        w2 = Constant(
-            'weight2',
-            Float32(),
-            qdata2 * scaling2
-        )
-
-        conv2 = Conv(
-            'conv2',
-            [1, 3, 3, 3],
-            Float32(),
-            {'X': dummy, 'W': w2},
-            kernel_shape=[2, 2]
-        )
-
-        s3 = Constant(
-            'aq_const1',
-            Float32(),
-            np.array(1)
-        )
-
-        s4 = Constant(
-            'aq_const2',
-            Float32(),
-            np.array(2)
-        )
-
-        aq2 = QTZ_linear_mid_tread_half(
-            'aqtz2',
-            [1, 3, 3, 3],
-            Float32(),
-            {'X': conv2, 'Y': s3, 'Z': s4}
-        )
-
-        w3 = Constant(
-            'weight3',
-            Float32(),
-            data3
-        )
-
-        conv3 = Conv(
-            'conv3',
-            [1, 2, 2, 3],
-            Float32(),
-            {'X': aq2, 'W': w3},
-            kernel_shape=[2, 2]
-        )
+        conv = Conv('conv', [1, 4, 4, 3], Float32(), {'X': x, 'W': q}, kernel_shape=[2, 2], dimension_format='NHWC')
+
+        rs = Reshape('reshape', [1, 48], Float32(), {'data': conv})
 
         # One output
-        y = Output(
-            'output',
-            [1, 2, 2, 3],
-            Float32(),
-            {'input': conv3}
-        )
+        y = Output('output', [1, 48], Float32(), {'input': rs},)
 
         # add ops to the graph
         graph.add_op_and_inputs(y)
 
         return graph
 
-    def create_quantized_graph(self, data: np.ndarray, data2: np.ndarray, data3: np.ndarray) \
-            -> Tuple[Graph, np.float32, np.float32]:
-        graph = Graph()
-
-        # two inputs
-        x = Input(
-            'placeholder',
-            [1, 5, 5, 3],
-            Float32(),
-        )
 
-        from modules.packer import Packer
-        packer = Packer(1, 32)
-        data = data.transpose([3, 2, 1, 0])
-        scaling, qdata = self.binary_mean_scaling(data)
-        shape = list(data.shape)
-        w = Constant(
-            'weight',
-            Float32(),
-            qdata * scaling,
-        )
-
-        q = QTZ_binary_mean_scaling(
-            'qtz1',
-            shape,
-            Float32(),
-            {'input': w}
-        )
-        q.scaling_factor = scaling
+class TestPassRemoveIdentities(unittest.TestCase):
+    """Test class for removing identity pass."""
 
-        # Conv
-        conv1 = Conv(
-            'conv1',
-            [1, 4, 4, 3],
-            Float32(),
-            {'X': x, 'W': w},
-            kernel_shape=[2, 2],
-        )
-
-        s1 = Constant(
-            'aq_const1',
-            Float32(),
-            np.array(1)
-        )
-
-        s2 = Constant(
-            'aq_const2',
-            Float32(),
-            np.array(2)
-        )
-
-        aq = QTZ_linear_mid_tread_half(
-            'aqtz1',
-            [1, 4, 4, 3],
-            QUANTIZED_NOT_PACKED(),
-            {'X': conv1, 'Y': s1, 'Z': s2}
-        )
-
-        dummy = Transpose(
-            'dummy',
-            [1, 4, 4, 3],
-            QUANTIZED_NOT_PACKED(),
-            {'data': aq},
-            perm=[0, 1, 2, 3]
-        )
-
-        scaling2, qdata2 = self.binary_mean_scaling(data2)
-        w2 = Constant(
-            'weight2',
-            Uint32(),
-            packer.run(qdata2),
-            packed=True,
-            actual_shape=[3, 2, 2, 3]
-        )
-
-        # quantizer connected to conv2 as 'conv2.quantizer'
-        q2 = QTZ_binary_mean_scaling(
-            'qtz2',
-            [3, 2, 2, 3],
-            Uint32(),
-            {'input': w2}
-        )
-        q2.scaling_factor = scaling2
-
-        conv2 = Conv(
-            'conv2',
-            [1, 3, 3, 3],
-            Float32(),
-            {'X': dummy, 'W': w2},
-            kernel_shape=[2, 2],
-            quantized=True
-        )
-        conv2.quantizer = q2
-
-        s3 = Constant(
-            'aq_const1',
-            Float32(),
-            np.array(1)
-        )
-
-        s4 = Constant(
-            'aq_const2',
-            Float32(),
-            np.array(2)
-        )
-
-        aq2 = QTZ_linear_mid_tread_half(
-            'aqtz2',
-            [1, 3, 3, 3],
-            Float32(),
-            {'X': conv2, 'Y': s3, 'Z': s4}
-        )
-
-        w3 = Constant(
-            'weight3',
-            Float32(),
-            data3
-        )
-
-        conv3 = Conv(
-            'conv3',
-            [1, 2, 2, 3],
-            Float32(),
-            {'X': aq2, 'W': w3},
-            kernel_shape=[2, 2]
-        )
+    def test_pass_remove_identities(self) -> None:
+        """Test code for removing identities optimizer pass."""
+        data = np.random.rand(1, 2, 2, 3)
+        graph1 = self.create_sample_graph(data)
+        graph2 = self.create_expected_graph(data)
 
-        # One output
-        y = Output(
-            'output',
-            [1, 2, 2, 3],
-            Float32(),
-            {'input': conv3}
-        )
+        pass_remove_identities(graph1)
 
-        # add ops to the graph
-        graph.add_op_and_inputs(y)
+        self.assertEqual(graph1, graph2, 'remove identities failed.')
 
-        return graph, scaling, scaling2
+        print("Test remove identities #2 pass passed!")
 
-    def create_sample_graph2(self, data: np.ndarray) -> Graph:
+    @staticmethod
+    def create_sample_graph(data: np.ndarray) -> Graph:
         graph = Graph()
 
         # input
-        x = Input(
-            'placeholder',
-            [3, 5, 5, 1],
-            Float32(),
-            dimension_format='CWHN'
-        )
+        x = Input('placeholder', [1, 5, 5, 3], Float32())
 
         # constant and internal nodes
-        w = Constant(
-            'weight',
-            Float32(),
-            data,
-            dimension_format='CWHN'
-        )
-
-        i = Identity(
-            'identity1',
-            [3, 2, 2, 1],
-            Float32(),
-            {'input': w},
-            dimension_format='CWHN'
-        )
-
-        q = QTZ_binary_mean_scaling(
-            'qtz1',
-            [3, 2, 2, 1],
-            Float32(),
-            {'input': i},
-            dimension_format='CWHN'
-        )
+        w = Constant('weight', Float32(), data)
+        i1 = Identity('identity1', [1, 2, 2, 3], Float32(), {'input': w})
+        q = QTZ_binary_mean_scaling('qtz1', [1, 2, 2, 3], Float32(), {'input': i1})
 
         # Conv
-        conv = Conv(
-            'conv',
-            [3, 4, 4, 1],
-            Float32(),
-            {'X': x, 'W': q},
-            kernel_shape=[2, 2],
-            dimension_format='CWHN'
-        )
-
-        rs = Reshape(
-            'reshape',
-            [1, 48],
-            Float32(),
-            {'data': conv}
-        )
+        conv = Conv('conv', [1, 4, 4, 3], Float32(), {'X': x, 'W': q}, kernel_shape=[2, 2])
+
+        i2 = Identity('identity2', [1, 4, 4, 3], Float32(), {'input': conv})
+
+        rs = Reshape('reshape', [1, 48], Float32(), {'data': i2})
 
         # One output
-        y = Output(
-            'output',
-            [1, 48],
-            Float32(),
-            {'input': rs},
-        )
+        y = Output('output', [1, 48], Float32(), {'input': rs}, )
 
         # add ops to the graph
         graph.add_op_and_inputs(y)
 
         return graph
 
-    def create_transposed_graph(self, data: np.ndarray) -> Graph:
+    @staticmethod
+    def create_expected_graph(data: np.ndarray) -> Graph:
         graph = Graph()
-        data = data.transpose([3, 2, 1, 0])
 
         # input
-        x = Input(
-            'placeholder',
-            [1, 5, 5, 3],
-            Float32(),
-            dimension_format='NHWC'
-        )
+        x = Input('placeholder', [1, 5, 5, 3], Float32())
 
         # constant and internal nodes
-        w = Constant(
-            'weight',
-            Float32(),
-            data,
-            dimension_format='NHWC'
-        )
-
-        i = Identity(
-            'identity1',
-            [1, 2, 2, 3],
-            Float32(),
-            {'input': w},
-            dimension_format='NHWC'
-        )
-
-        q = QTZ_binary_mean_scaling(
-            'qtz1',
-            [1, 2, 2, 3],
-            Float32(),
-            {'input': i},
-            dimension_format='NHWC'
-        )
+        w = Constant('weight', Float32(), data)
+        q = QTZ_binary_mean_scaling('qtz1', [1, 2, 2, 3], Float32(), {'input': w})
 
         # Conv
-        conv = Conv(
-            'conv',
-            [1, 4, 4, 3],
-            Float32(),
-            {'X': x, 'W': q},
-            kernel_shape=[2, 2],
-            dimension_format='NHWC'
-        )
-
-        rs = Reshape(
-            'reshape',
-            [1, 48],
-            Float32(),
-            {'data': conv}
-        )
+        conv = Conv('conv', [1, 4, 4, 3], Float32(), {'X': x, 'W': q}, kernel_shape=[2, 2])
+
+        rs = Reshape('reshape', [1, 48], Float32(), {'data': conv})
 
         # One output
-        y = Output(
-            'output',
-            [1, 48],
-            Float32(),
-            {'input': rs},
-        )
+        y = Output('output', [1, 48], Float32(), {'input': rs},)
 
         # add ops to the graph
         graph.add_op_and_inputs(y)
 
         return graph
 
-    def create_sample_graph3(self, data1: np.ndarray, data2: np.ndarray, data3: np.ndarray) -> Graph:
+
+class TestPassPropagateQuantizationDetailsIntoConv(unittest.TestCase):
+    """Test class for propagating quantization details into conv."""
+    def test_pass_propagate_quantization_details_into_conv(self) -> None:
+        """Test pass."""
+        data1 = np.random.rand(1, 2, 2, 3)
+        data2 = np.random.rand(1, 2, 2, 3)
+        graph1 = self.create_sample_graph(data1, data2)
+        graph2 = self.create_expected_graph(data1, data2)
+
+        pass_propagate_quantization_details_into_conv(graph1)
+        aq_g1 = graph1.get_op('conv2').a_quantizer
+        aq_g2 = graph2.get_op('conv2').a_quantizer
+        kq_g1 = graph1.get_op('conv2').quantizer
+        kq_g2 = graph2.get_op('conv2').quantizer
+
+        self.assertEqual(len(aq_g1), len(aq_g2), '[Failed] Found number of activation quantizer not matched')
+        if aq_g1 and aq_g2:
+            self.assertEqual(aq_g1[0].op_type, aq_g2[0].op_type,
+                             '[Failed] Found type of activation quantizer not matched')
+        self.assertEqual(kq_g1.op_type, kq_g2.op_type, '[Failed] Found type of kernel quantizer not matched')
+        self.assertEqual(graph1, graph2, '[Failed] Expected graph not matched')
+
+        print("Test propagate_quantization_details_into_conv #3 pass passed!")
+
+    @staticmethod
+    def create_sample_graph(data1: np.ndarray, data2: np.ndarray) -> Graph:
         graph = Graph()
 
         # input
-        x = Input(
-            'placeholder',
-            [1, 5, 5, 3],
-            Float32(),
-        )
+        x = Input('placeholder', [1, 5, 5, 3], Float32())
 
-        # constant and internal nodes
-        w = Constant(
-            'weight',
-            Float32(),
-            data1
-        )
-
-        q = QTZ_binary_mean_scaling(
-            'qtz1',
-            [3, 2, 2, 3],
-            Float32(),
-            {'input': w}
-        )
+        # Conv1
+        w1 = Constant('weight1', Float32(), data1)
+        conv1 = Conv('conv1', [1, 4, 4, 3], Float32(), {'X': x, 'W': w1}, kernel_shape=[2, 2])
 
-        # Conv
-        conv1 = Conv(
-            'conv1',
-            [1, 4, 4, 3],
-            Float32(),
-            {'X': x, 'W': q},
-            kernel_shape=[2, 2]
-        )
-
-        i2 = Identity(
-            'identity2',
-            [1, 4, 4, 3],
-            Float32(),
-            {'input': conv1}
-        )
-
-        s1 = Constant(
-            'aq_const1',
-            Float32(),
-            np.array(1)
-        )
-
-        s2 = Constant(
-            'aq_const2',
-            Float32(),
-            np.array(2)
-        )
-
-        aq = QTZ_linear_mid_tread_half(
-            'aqtz1',
-            [1, 4, 4, 3],
-            Float32(),
-            {'X': i2, 'Y': s1, 'Z': s2}
-        )
-
-        w2 = Constant(
-            'weight2',
-            Float32(),
-            data2
-        )
-
-        q2 = QTZ_binary_mean_scaling(
-            'qtz2',
-            [3, 2, 2, 3],
-            Float32(),
-            {'input': w2}
-        )
-
-        conv2 = Conv(
-            'conv2',
-            [1, 3, 3, 3],
-            Float32(),
-            {'X': aq, 'W': q2},
-            kernel_shape=[2, 2]
-        )
-
-        w3 = Constant(
-            'weight3',
-            Float32(),
-            data3
-        )
-
-        q3 = QTZ_binary_mean_scaling(
-            'qtz3',
-            [3, 2, 2, 3],
-            Float32(),
-            {'input': w3}
-        )
-
-        conv3 = Conv(
-            'conv3',
-            [1, 3, 3, 3],
-            Float32(),
-            {'X': aq, 'W': q3},
-            kernel_shape=[2, 2]
-        )
-
-        y1 = Output(
-            'output1',
-            [1, 3, 3, 3],
-            Float32(),
-            {'input': conv2}
-        )
-
-        y2 = Output(
-            'output2',
-            [1, 3, 3, 3],
-            Float32(),
-            {'input': conv3}
-        )
+        # activation quantizer
+        s1 = Constant('aq_const1', Float32(), np.array(1))
+        s2 = Constant('aq_const2', Float32(), np.array(2))
+        aq = QTZ_linear_mid_tread_half('aqtz1', [1, 4, 4, 3], Float32(), {'X': conv1, 'Y': s1, 'Z': s2})
+
+        # Conv2
+        w2 = Constant('weight2', Float32(), data2)
+        kq = QTZ_binary_mean_scaling('kqtz1', [1, 2, 2, 3], Float32(), {'input': w2})
+        conv2 = Conv('conv2', [1, 3, 3, 3], Float32(), {'X': aq, 'W': kq}, kernel_shape=[2, 2])
+
+        # One output
+        y = Output('output', [1, 3, 3, 3], Float32(), {'input': conv2})
 
         # add ops to the graph
-        graph.add_op_and_inputs(y1)
-        graph.add_op_and_inputs(y2)
+        graph.add_op_and_inputs(y)
 
         return graph
 
-    def create_quantized_graph2(self, data1: np.ndarray, data2: np.ndarray, data3: np.ndarray) -> Graph:
+    @staticmethod
+    def create_expected_graph(data1: np.ndarray, data2: np.ndarray) -> Graph:
         graph = Graph()
 
         # input
-        x = Input(
-            'placeholder',
-            [1, 5, 5, 3],
-            Float32(),
-        )
+        x = Input('placeholder', [1, 5, 5, 3], Float32())
 
-        # constant and internal nodes
-        scaling1, qdata1 = self.binary_mean_scaling(data1)
-        w = Constant(
-            'weight',
-            Float32(),
-            qdata1 * scaling1
-        )
-
-        q = QTZ_binary_mean_scaling(
-            'qtz1',
-            [3, 2, 2, 3],
-            Float32(),
-            {'input': w}
-        )
+        # Conv1
+        w1 = Constant('weight1', Float32(), data1)
+        conv1 = Conv('conv1', [1, 4, 4, 3], Float32(), {'X': x, 'W': w1}, kernel_shape=[2, 2])
 
-        # Conv
-        conv1 = Conv(
-            'conv1',
-            [1, 4, 4, 3],
-            Float32(),
-            {'X': x, 'W': w},
-            kernel_shape=[2, 2]
-        )
-
-        s1 = Constant(
-            'aq_const1',
-            Float32(),
-            np.array(1)
-        )
-
-        s2 = Constant(
-            'aq_const2',
-            Float32(),
-            np.array(2)
-        )
-
-        aq = QTZ_linear_mid_tread_half(
-            'aqtz1',
-            [1, 4, 4, 3],
-            QUANTIZED_NOT_PACKED(),
-            {'X': conv1, 'Y': s1, 'Z': s2}
-        )
-
-        from modules.packer import Packer
-        packer = Packer(1, 32)
-        scaling2, qdata2 = self.binary_mean_scaling(data2)
-        w2 = Constant(
-            'weight2',
-            Uint32(),
-            packer.run(qdata2),
-            packed=True,
-            actual_shape=[3, 2, 2, 3]
-        )
-
-        q2 = QTZ_binary_mean_scaling(
-            'qtz2',
-            [3, 2, 2, 3],
-            Float32(),
-            {'input': w2}
-        )
-        q2.scaling_factor = scaling2
-
-        conv2 = Conv(
-            'conv2',
-            [1, 3, 3, 3],
-            Float32(),
-            {'X': aq, 'W': w2},
-            kernel_shape=[2, 2],
-            quantized=True,
-        )
-        conv2.quantizer = q2
-
-        scaling3, qdata3 = self.binary_mean_scaling(data3)
-        w3 = Constant(
-            'weight2',
-            Uint32(),
-            packer.run(qdata3),
-            packed=True,
-            actual_shape=[3, 2, 2, 3]
-        )
-
-        q3 = QTZ_binary_mean_scaling(
-            'qtz3',
-            [3, 2, 2, 3],
-            Float32(),
-            {'input': w3}
-        )
-        q3.scaling_factor = scaling3
-
-        conv3 = Conv(
-            'conv3',
-            [1, 3, 3, 3],
-            Float32(),
-            {'X': aq, 'W': w3},
-            kernel_shape=[2, 2],
-            quantized=True
-        )
-        conv3.quantizer = q3
-
-        y1 = Output(
-            'output1',
-            [1, 3, 3, 3],
-            Float32(),
-            {'input': conv2}
-        )
-
-        y2 = Output(
-            'output2',
-            [1, 3, 3, 3],
-            Float32(),
-            {'input': conv3}
-        )
+        # activation quantizer
+        s1 = Constant('aq_const1', Float32(), np.array(1))
+        s2 = Constant('aq_const2', Float32(), np.array(2))
+        aq = QTZ_linear_mid_tread_half('aqtz1', [1, 4, 4, 3], Float32(), {'X': conv1, 'Y': s1, 'Z': s2})
+
+        # Conv2
+        w2 = Constant('weight2', Float32(), data2)
+        kq = QTZ_binary_mean_scaling('kqtz1', [1, 2, 2, 3], Float32(), {'input': w2})
+        conv2 = Conv('conv2', [1, 3, 3, 3], Float32(), {'X': aq, 'W': kq}, kernel_shape=[2, 2])
+        conv2.a_quantizer = [aq]
+        conv2.quantizer = kq
+
+        # One output
+        y = Output('output', [1, 3, 3, 3], Float32(), {'input': conv2})
 
         # add ops to the graph
-        graph.add_op_and_inputs(y1)
-        graph.add_op_and_inputs(y2)
+        graph.add_op_and_inputs(y)
 
-        return graph, scaling2, scaling3
+        return graph
 
 
 if __name__ == '__main__':

From a2c51a14fc6c00db70dd81b804b0b5facb23b2c0 Mon Sep 17 00:00:00 2001
From: nlpng <neil.phuang@gmail.com>
Date: Thu, 17 Jan 2019 08:49:45 +0900
Subject: [PATCH 35/45] just add more tests for more optimizatino pass

---
 dlk/tests/test_optimizer.py | 174 +++++++++++++++++++++++++++++++++++-
 1 file changed, 173 insertions(+), 1 deletion(-)

diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py
index 0c0555cdf..9a47b70cc 100644
--- a/dlk/tests/test_optimizer.py
+++ b/dlk/tests/test_optimizer.py
@@ -21,7 +21,8 @@
     pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward
 from core.graph import Graph
 from core.operators import Add, AveragePool, BatchNormalization, Constant, Conv, Identity, Input, \
-    MaxPool, Operator, Output, Transpose, QTZ_binary_mean_scaling, QTZ_linear_mid_tread_half, Reshape, Softmax
+    MaxPool, Operator, Output, Transpose, QTZ_binary_mean_scaling, QTZ_linear_mid_tread_half, Reshape, Softmax, \
+    SpaceToDepth
 
 import numpy as np
 from typing import Tuple
@@ -247,5 +248,176 @@ def create_expected_graph(data1: np.ndarray, data2: np.ndarray) -> Graph:
         return graph
 
 
+class TestPassPackWeights(unittest.TestCase):
+    """Test class for packing weight."""
+    def test_pass_pack_weights(self) -> None:
+        """Test pass."""
+        data1 = np.float32(np.random.rand(1, 2, 2, 3))
+        data2 = np.float32(np.random.rand(1, 2, 2, 3))
+        graph1 = self.create_sample_graph(data1, data2)
+
+        pass_pack_weights(graph1)
+
+        self.assertEqual(graph1.get_op('conv2').input_ops['W'].op_type, 'Constant',
+                         '[Failed] Found input kernel weights not a constant')
+
+        print("Test pack_weights #4 pass passed!")
+
+    @staticmethod
+    def create_sample_graph(data1: np.ndarray, data2: np.ndarray) -> Graph:
+        graph = Graph()
+
+        # input
+        x = Input('placeholder', [1, 5, 5, 3], Float32())
+
+        # Conv1
+        w1 = Constant('weight1', Float32(), data1)
+        conv1 = Conv('conv1', [1, 4, 4, 3], Float32(), {'X': x, 'W': w1}, kernel_shape=[2, 2])
+
+        # activation quantizer
+        s1 = Constant('aq_const1', Float32(), np.array(1))
+        s2 = Constant('aq_const2', Float32(), np.array(2))
+        aq = QTZ_linear_mid_tread_half('aqtz1', [1, 4, 4, 3], Float32(), {'X': conv1, 'Y': s1, 'Z': s2})
+
+        # Conv2
+        w2 = Constant('weight2', Float32(), data2)
+        kq = QTZ_binary_mean_scaling('kqtz1', [1, 2, 2, 3], Float32(), {'input': w2})
+        conv2 = Conv('conv2', [1, 3, 3, 3], Float32(), {'X': aq, 'W': kq}, kernel_shape=[2, 2])
+        conv2.a_quantizer = [aq]
+        conv2.quantizer = kq
+
+        # One output
+        y = Output('output', [1, 3, 3, 3], Float32(), {'input': conv2})
+
+        # add ops to the graph
+        graph.add_op_and_inputs(y)
+
+        return graph
+
+
+class TestPassQuantizeConvolutions(unittest.TestCase):
+    """Test class for packing weight."""
+    def test_pass_quantize_convolutions(self) -> None:
+        """Test pass."""
+        data1 = np.float32(np.random.rand(1, 2, 2, 3))
+        data2 = np.float32(np.random.rand(1, 2, 2, 3))
+        graph1 = self.create_sample_graph(data1, data2)
+
+        pass_quantize_convolutions(graph1)
+
+        self.assertEqual(graph1.get_op('aqtz1').dtype, QUANTIZED_NOT_PACKED(),
+                         '[Failed] Found output dtype of activation quantizer not proper')
+        self.assertEqual(graph1.get_op('conv2').dtype, Float32(),
+                         '[Failed] Found output dtype of conv not proper')
+
+        print("Test quantize_convolutions #5 pass passed!")
+
+    @staticmethod
+    def create_sample_graph(data1: np.ndarray, data2: np.ndarray) -> Graph:
+        graph = Graph()
+
+        # input
+        x = Input('placeholder', [1, 5, 5, 3], Float32())
+
+        # Conv1
+        w1 = Constant('weight1', Float32(), data1)
+        conv1 = Conv('conv1', [1, 4, 4, 3], Float32(), {'X': x, 'W': w1}, kernel_shape=[2, 2])
+
+        # activation quantizer
+        s1 = Constant('aq_const1', Float32(), np.array(1))
+        s2 = Constant('aq_const2', Float32(), np.array(2))
+        aq = QTZ_linear_mid_tread_half('aqtz1', [1, 4, 4, 3], Float32(), {'X': conv1, 'Y': s1, 'Z': s2})
+
+        # Conv2
+        w2 = Constant('weight2', Float32(), data2)
+        kq = QTZ_binary_mean_scaling('kqtz1', [1, 2, 2, 3], Float32(), {'input': w2})
+        conv2 = Conv('conv2', [1, 3, 3, 3], Float32(), {'X': aq, 'W': kq}, kernel_shape=[2, 2])
+        conv2.a_quantizer = [aq]
+        conv2.quantizer = kq
+
+        # One output
+        y = Output('output', [1, 3, 3, 3], Float32(), {'input': conv2})
+
+        # add ops to the graph
+        graph.add_op_and_inputs(y)
+
+        return graph
+
+
+class TestPassPropagateDatatypes(unittest.TestCase):
+    """Test class for packing weight."""
+    def test_pass_propagate_datatypes(self) -> None:
+        """Test pass."""
+        data1 = np.float32(np.random.rand(1, 2, 2, 3))
+        graph1 = self.create_sample_graph(data1)
+        # graph2 = self.create_expected_graph(data1, data2)
+
+        pass_propagate_datatypes(graph1)
+
+        self.assertEqual(graph1.get_op('s2d').dtype, QUANTIZED_NOT_PACKED(),
+                         '[Failed] Found dtype of SpaceToDepth not propagate correctly')
+
+        print("Test propagate datatypes #6 pass passed!")
+
+    @staticmethod
+    def create_sample_graph(data1: np.ndarray) -> Graph:
+        graph = Graph()
+
+        # input
+        x = Input('placeholder', [1, 5, 5, 3], Float32())
+
+        # Conv1
+        w1 = Constant('weight1', Float32(), data1)
+        conv1 = Conv('conv1', [1, 4, 4, 3], QUANTIZED_NOT_PACKED(), {'X': x, 'W': w1}, kernel_shape=[2, 2])
+
+        pool1 = SpaceToDepth('s2d', [1, 2, 2, 12], Float32(), {'input': conv1})
+
+        # One output
+        y = Output('output', [1, 2, 2, 12], Float32(), {'input': pool1})
+
+        # add ops to the graph
+        graph.add_op_and_inputs(y)
+
+        return graph
+
+
+class TestPassPropagateOutputTypeBackward(unittest.TestCase):
+    """Test class for packing weight."""
+    def test_pass_propagate_output_type_backward(self) -> None:
+        """Test pass."""
+        data1 = np.float32(np.random.rand(1, 2, 2, 3))
+        graph1 = self.create_sample_graph(data1)
+        # graph2 = self.create_expected_graph(data1, data2)
+
+        pass_propagate_output_type_backward(graph1)
+
+        self.assertEqual(graph1.get_op('conv1').dtype, Float32(),
+                         '[Failed] Found dtype of SpaceToDepth not propagate correctly')
+
+        print("Test propagate output type backward #7 pass passed!")
+
+    @staticmethod
+    def create_sample_graph(data1: np.ndarray) -> Graph:
+        graph = Graph()
+
+        # input
+        x = Input('placeholder', [1, 5, 5, 3], Float32())
+
+        # Conv1
+        w1 = Constant('weight1', Float32(), data1)
+        conv1 = Conv('conv1', [1, 4, 4, 3], QUANTIZED_NOT_PACKED(), {'X': x, 'W': w1}, kernel_shape=[2, 2])
+        conv1.is_quantized = True
+
+        pool1 = SpaceToDepth('s2d', [1, 2, 2, 12], Float32(), {'input': conv1})
+
+        # One output
+        y = Output('output', [1, 2, 2, 12], Float32(), {'input': pool1})
+
+        # add ops to the graph
+        graph.add_op_and_inputs(y)
+
+        return graph
+
+
 if __name__ == '__main__':
     unittest.main()

From aa5bb3d1351cfd7397d3507654f4f94cf268436e Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Thu, 17 Jan 2019 10:59:12 +0900
Subject: [PATCH 36/45] Add more more tests for optimization pass

---
 dlk/tests/test_optimizer.py | 64 +++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py
index 9a47b70cc..2f912691a 100644
--- a/dlk/tests/test_optimizer.py
+++ b/dlk/tests/test_optimizer.py
@@ -419,5 +419,69 @@ def create_sample_graph(data1: np.ndarray) -> Graph:
         return graph
 
 
+class TestPassComputeThresholds(unittest.TestCase):
+    """Test class for packing weight."""
+    def test_pass_compute_thresholds(self) -> None:
+        """Test pass."""
+        data1 = np.float32(np.random.rand(1, 2, 2, 3))
+        data2 = np.float32(np.random.rand(1, 2, 2, 3))
+        graph1 = self.create_sample_graph(data1, data2)
+        # graph2 = self.create_expected_graph(data1, data2)
+
+        pass_compute_thresholds(graph1)
+
+        self.assertEqual(graph1.get_op('conv2').has_thresholds, True,
+                         '[Failed] Found threshold of Conv not calculated')
+
+        print("Test compute_thresholds #8 pass passed!")
+
+    @staticmethod
+    def create_sample_graph(data1: np.ndarray, data2: np.ndarray) -> Graph:
+        graph = Graph()
+
+        # input
+        x = Input('placeholder', [1, 5, 5, 3], Float32())
+
+        # Conv1
+        w1 = Constant('weight1', Float32(), data1)
+        conv1 = Conv('conv1', [1, 4, 4, 3], Float32(), {'X': x, 'W': w1}, kernel_shape=[2, 2])
+
+        # activation quantizer
+        s1 = Constant('aq_const1', Int32(), np.array([2], dtype=np.int32))
+        s2 = Constant('aq_const2', Float32(), np.array([2.0], dtype=np.float32))
+        aq1 = QTZ_linear_mid_tread_half('aqtz1', [1, 4, 4, 3], Float32(), {'X': conv1, 'Y': s1, 'Z': s2})
+
+        # Conv2
+        w2 = Constant('weight2', Float32(), data2)
+        kq = QTZ_binary_mean_scaling('kqtz1', [1, 2, 2, 3], Float32(), {'input': w2})
+        conv2 = Conv('conv2', [1, 3, 3, 3], Float32(), {'X': aq1, 'W': kq}, kernel_shape=[2, 2])
+        conv2.a_quantizer = [aq1]
+        conv2.quantizer = kq
+        conv2.is_quantized = True
+
+        sc = Constant('bn_scale', Float32(), np.random.rand(3))
+        be = Constant('bn_b', Float32(), np.random.rand(3))
+        mu = Constant('bn_mu', Float32(), np.random.rand(3))
+        va = Constant('bn_var', Float32(), np.random.rand(3))
+        bn = BatchNormalization('bn', [1, 3, 3, 3], Float32(), {'X': conv2,
+                                                                'scale': sc,
+                                                                'B': be,
+                                                                'mean': mu,
+                                                                'var': va})
+
+        # activation quantizer
+        s3 = Constant('aq_const3', Int32(), np.array([2], dtype=np.int32))
+        s4 = Constant('aq_const4', Float32(), np.array([2.0], dtype=np.float32))
+        aq2 = QTZ_linear_mid_tread_half('aqtz2', [1, 3, 3, 3], Float32(), {'X': bn, 'Y': s3, 'Z': s4})
+
+        # One output
+        y = Output('output', [1, 3, 3, 3], Float32(), {'input': aq2})
+
+        # add ops to the graph
+        graph.add_op_and_inputs(y)
+
+        return graph
+
+
 if __name__ == '__main__':
     unittest.main()

From d3b496e99b9510764a7f1db1e0d70cfcc2b06278 Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Thu, 17 Jan 2019 13:23:28 +0900
Subject: [PATCH 37/45] Add last test for constant folding pass

---
 dlk/tests/test_optimizer.py | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py
index 2f912691a..819760bd7 100644
--- a/dlk/tests/test_optimizer.py
+++ b/dlk/tests/test_optimizer.py
@@ -483,5 +483,38 @@ def create_sample_graph(data1: np.ndarray, data2: np.ndarray) -> Graph:
         return graph
 
 
+class TestPassConstantFolding(unittest.TestCase):
+    """Test class for packing weight."""
+    def test_pass_constant_folding(self) -> None:
+        """Test pass."""
+        graph1 = self.create_sample_graph()
+
+        pass_constant_folding(graph1)
+
+        self.assertEqual(set(graph1.get_op('potatoes_new').data), set(np.array([2, 5])),
+                         '[Failed] Found folded constant not correct')
+
+        print("Test constant folding #9 pass passed!")
+
+    @staticmethod
+    def create_sample_graph() -> Graph:
+        graph = Graph()
+
+        x = Input('placeholder', [2], Float32())
+
+        s1 = Constant('potato_1', Float32(), np.array([1, 2]))
+        s2 = Constant('potato_2', Float32(), np.array([1, 3]))
+        add1 = Add('potatoes', [2], Float32(), {'A': s1, 'B': s2})
+        add2 = Add('more_potatoes', [2], Float32(), {'A': x, 'B': add1})
+
+        # One output
+        y = Output('output', [2], Float32(), {'input': add2})
+
+        # add ops to the graph
+        graph.add_op_and_inputs(y)
+
+        return graph
+
+
 if __name__ == '__main__':
     unittest.main()

From ebc467931aa9286368bccedc567782810e36020c Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Thu, 17 Jan 2019 14:23:51 +0900
Subject: [PATCH 38/45] Add functions comments for run and de_run

---
 dlk/python/dlk/core/operators.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py
index e54090d4f..ff9460ea9 100644
--- a/dlk/python/dlk/core/operators.py
+++ b/dlk/python/dlk/core/operators.py
@@ -1289,6 +1289,11 @@ def _check_consistency(self) -> None:
         self._assert(x_shape == self.shape, message)
 
     def run(self, **kwargs) -> Dict:
+        """Return the forward calculation results of batch normalization.
+
+        Currently this function is only used by threshold skipping optimization pass
+        for recursively calculating thresholds of the skipping patterns.
+        """
         scale = np.float64(self._input_ops['scale'].data)
         beta = np.float64(self._input_ops['B'].data)
         mean = np.float64(self._input_ops['mean'].data)
@@ -1299,6 +1304,11 @@ def run(self, **kwargs) -> Dict:
         return kwargs
 
     def de_run(self, **kwargs) -> Dict:
+        """Return the reversed calculation results of batch normalization.
+
+        Currently this function is only used by threshold skipping optimization pass
+        for recursively calculating thresholds of the skipping patterns.
+        """
         scale = np.float64(self._input_ops['scale'].data)
         beta = np.float64(self._input_ops['B'].data)
         mean = np.float64(self._input_ops['mean'].data)
@@ -1370,6 +1380,11 @@ def _check_consistency(self) -> None:
         self._assert(x_shape == self.shape, message)
 
     def run(self, **kwargs) -> Dict:
+        """Return the result of forward calculation of an activation quantizer.
+
+        Currently this function is only used by threshold skipping optimization pass
+        for recursively calculating thresholds of the skipping patterns.
+        """
         bit = self._input_ops['Y'].data
         max_value = np.float64(self._input_ops['Z'].data)
         in_data = np.float64(kwargs['data'])
@@ -1380,6 +1395,11 @@ def run(self, **kwargs) -> Dict:
         return kwargs
 
     def de_run(self, **kwargs) -> Dict:
+        """Return the result of reversed calculation of an activation quantizer.
+
+        Currently this function is only used by threshold skipping optimization pass
+        for recursively calculating thresholds of the skipping patterns.
+        """
         bit = self._input_ops['Y'].data
         max_value = np.float64(self._input_ops['Z'].data)
         in_data = np.float64(kwargs['data'])
@@ -1389,6 +1409,10 @@ def de_run(self, **kwargs) -> Dict:
         return kwargs
 
     def run_forward(self) -> np.ndarray:
+        """General function for this quantization operator.
+
+        This function returns numpy array.
+        """
         data_dict = self.run(data=self._input_ops['X'].data)
         self._data = data_dict['data']
         return self._data

From 9b5c66b1a001623c18ad1e6493ad5d272dc6d7ca Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Thu, 17 Jan 2019 14:32:48 +0900
Subject: [PATCH 39/45] Remove redundant codes and comments

---
 dlk/tests/test_optimizer.py | 41 +++++++++++++++----------------------
 1 file changed, 16 insertions(+), 25 deletions(-)

diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py
index 819760bd7..3c250ccfe 100644
--- a/dlk/tests/test_optimizer.py
+++ b/dlk/tests/test_optimizer.py
@@ -25,23 +25,21 @@
     SpaceToDepth
 
 import numpy as np
-from typing import Tuple
 
 
 class TestPassTranspose(unittest.TestCase):
     """Test class for transposing pass."""
-
     def test_pass_transpose(self) -> None:
         """Test code for transposing optimizer pass."""
-        data1 = np.random.rand(3, 2, 2, 1)
-        graph1 = self.create_sample_graph(data1)
-        graph2 = self.create_expected_graph(data1)
+        data = np.random.rand(3, 2, 2, 1)
+        graph1 = self.create_sample_graph(data)
+        graph2 = self.create_expected_graph(data)
 
         pass_transpose(graph1)
 
         self.assertEqual(graph1, graph2, 'transpose to NHWC failed.')
 
-        print("Test transpose #1 pass passed!")
+        print("Test pass #1 transpose passed!")
 
     @staticmethod
     def create_sample_graph(data: np.ndarray) -> Graph:
@@ -58,9 +56,8 @@ def create_sample_graph(data: np.ndarray) -> Graph:
         # Conv
         conv = Conv('conv', [3, 4, 4, 1], Float32(), {'X': x, 'W': q}, kernel_shape=[2, 2], dimension_format='CWHN')
 
-        rs = Reshape('reshape', [1, 48], Float32(), {'data': conv})
-
         # One output
+        rs = Reshape('reshape', [1, 48], Float32(), {'data': conv})
         y = Output('output', [1, 48], Float32(), {'input': rs},)
 
         # add ops to the graph
@@ -98,7 +95,6 @@ def create_expected_graph(data: np.ndarray) -> Graph:
 
 class TestPassRemoveIdentities(unittest.TestCase):
     """Test class for removing identity pass."""
-
     def test_pass_remove_identities(self) -> None:
         """Test code for removing identities optimizer pass."""
         data = np.random.rand(1, 2, 2, 3)
@@ -109,7 +105,7 @@ def test_pass_remove_identities(self) -> None:
 
         self.assertEqual(graph1, graph2, 'remove identities failed.')
 
-        print("Test remove identities #2 pass passed!")
+        print("Test pass #2 remove identities passed!")
 
     @staticmethod
     def create_sample_graph(data: np.ndarray) -> Graph:
@@ -126,12 +122,10 @@ def create_sample_graph(data: np.ndarray) -> Graph:
         # Conv
         conv = Conv('conv', [1, 4, 4, 3], Float32(), {'X': x, 'W': q}, kernel_shape=[2, 2])
 
+        # One output
         i2 = Identity('identity2', [1, 4, 4, 3], Float32(), {'input': conv})
-
         rs = Reshape('reshape', [1, 48], Float32(), {'data': i2})
-
-        # One output
-        y = Output('output', [1, 48], Float32(), {'input': rs}, )
+        y = Output('output', [1, 48], Float32(), {'input': rs},)
 
         # add ops to the graph
         graph.add_op_and_inputs(y)
@@ -152,9 +146,8 @@ def create_expected_graph(data: np.ndarray) -> Graph:
         # Conv
         conv = Conv('conv', [1, 4, 4, 3], Float32(), {'X': x, 'W': q}, kernel_shape=[2, 2])
 
-        rs = Reshape('reshape', [1, 48], Float32(), {'data': conv})
-
         # One output
+        rs = Reshape('reshape', [1, 48], Float32(), {'data': conv})
         y = Output('output', [1, 48], Float32(), {'input': rs},)
 
         # add ops to the graph
@@ -185,7 +178,7 @@ def test_pass_propagate_quantization_details_into_conv(self) -> None:
         self.assertEqual(kq_g1.op_type, kq_g2.op_type, '[Failed] Found type of kernel quantizer not matched')
         self.assertEqual(graph1, graph2, '[Failed] Expected graph not matched')
 
-        print("Test propagate_quantization_details_into_conv #3 pass passed!")
+        print("Test pass #3 propagate_quantization_details_into_conv passed!")
 
     @staticmethod
     def create_sample_graph(data1: np.ndarray, data2: np.ndarray) -> Graph:
@@ -261,7 +254,7 @@ def test_pass_pack_weights(self) -> None:
         self.assertEqual(graph1.get_op('conv2').input_ops['W'].op_type, 'Constant',
                          '[Failed] Found input kernel weights not a constant')
 
-        print("Test pack_weights #4 pass passed!")
+        print("Test pass #4 pack_weights passed!")
 
     @staticmethod
     def create_sample_graph(data1: np.ndarray, data2: np.ndarray) -> Graph:
@@ -310,7 +303,7 @@ def test_pass_quantize_convolutions(self) -> None:
         self.assertEqual(graph1.get_op('conv2').dtype, Float32(),
                          '[Failed] Found output dtype of conv not proper')
 
-        print("Test quantize_convolutions #5 pass passed!")
+        print("Test pass #5 quantize_convolutions passed!")
 
     @staticmethod
     def create_sample_graph(data1: np.ndarray, data2: np.ndarray) -> Graph:
@@ -357,7 +350,7 @@ def test_pass_propagate_datatypes(self) -> None:
         self.assertEqual(graph1.get_op('s2d').dtype, QUANTIZED_NOT_PACKED(),
                          '[Failed] Found dtype of SpaceToDepth not propagate correctly')
 
-        print("Test propagate datatypes #6 pass passed!")
+        print("Test pass #6 propagate data types passed!")
 
     @staticmethod
     def create_sample_graph(data1: np.ndarray) -> Graph:
@@ -387,14 +380,13 @@ def test_pass_propagate_output_type_backward(self) -> None:
         """Test pass."""
         data1 = np.float32(np.random.rand(1, 2, 2, 3))
         graph1 = self.create_sample_graph(data1)
-        # graph2 = self.create_expected_graph(data1, data2)
 
         pass_propagate_output_type_backward(graph1)
 
         self.assertEqual(graph1.get_op('conv1').dtype, Float32(),
                          '[Failed] Found dtype of SpaceToDepth not propagate correctly')
 
-        print("Test propagate output type backward #7 pass passed!")
+        print("Test pass #7 propagate output type backward passed!")
 
     @staticmethod
     def create_sample_graph(data1: np.ndarray) -> Graph:
@@ -426,14 +418,13 @@ def test_pass_compute_thresholds(self) -> None:
         data1 = np.float32(np.random.rand(1, 2, 2, 3))
         data2 = np.float32(np.random.rand(1, 2, 2, 3))
         graph1 = self.create_sample_graph(data1, data2)
-        # graph2 = self.create_expected_graph(data1, data2)
 
         pass_compute_thresholds(graph1)
 
         self.assertEqual(graph1.get_op('conv2').has_thresholds, True,
                          '[Failed] Found threshold of Conv not calculated')
 
-        print("Test compute_thresholds #8 pass passed!")
+        print("Test pass #8 compute_thresholds passed!")
 
     @staticmethod
     def create_sample_graph(data1: np.ndarray, data2: np.ndarray) -> Graph:
@@ -494,7 +485,7 @@ def test_pass_constant_folding(self) -> None:
         self.assertEqual(set(graph1.get_op('potatoes_new').data), set(np.array([2, 5])),
                          '[Failed] Found folded constant not correct')
 
-        print("Test constant folding #9 pass passed!")
+        print("Test pass #9 constant folding passed!")
 
     @staticmethod
     def create_sample_graph() -> Graph:

From 4b006f9ced893729fef34782f8eed6f2d3235799 Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Fri, 18 Jan 2019 09:18:09 +0900
Subject: [PATCH 40/45] just move things around

---
 dlk/tests/test_optimizer.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py
index 3c250ccfe..ad9e02f8e 100644
--- a/dlk/tests/test_optimizer.py
+++ b/dlk/tests/test_optimizer.py
@@ -76,15 +76,14 @@ def create_expected_graph(data: np.ndarray) -> Graph:
 
         # constant and internal nodes
         w = Constant('weight', Float32(), data, dimension_format='NHWC')
-        i = Identity('identity1', [1, 2, 2, 3], Float32(), {'input': w}, dimension_format='NHWC')
-        q = QTZ_binary_mean_scaling('qtz1', [1, 2, 2, 3], Float32(), {'input': i}, dimension_format='NHWC')
+        i1 = Identity('identity1', [1, 2, 2, 3], Float32(), {'input': w}, dimension_format='NHWC')
+        q = QTZ_binary_mean_scaling('qtz1', [1, 2, 2, 3], Float32(), {'input': i1}, dimension_format='NHWC')
 
         # Conv
         conv = Conv('conv', [1, 4, 4, 3], Float32(), {'X': x, 'W': q}, kernel_shape=[2, 2], dimension_format='NHWC')
 
-        rs = Reshape('reshape', [1, 48], Float32(), {'data': conv})
-
         # One output
+        rs = Reshape('reshape', [1, 48], Float32(), {'data': conv})
         y = Output('output', [1, 48], Float32(), {'input': rs},)
 
         # add ops to the graph

From ca65f4ec73ad368f8aa65d11c1911e82fa593930 Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Tue, 29 Jan 2019 12:15:50 +0900
Subject: [PATCH 41/45] add new assertion check for quantize convolution pass

---
 dlk/python/dlk/core/optimizer.py | 2 +-
 dlk/tests/test_optimizer.py      | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py
index 7b4484783..e9365a507 100644
--- a/dlk/python/dlk/core/optimizer.py
+++ b/dlk/python/dlk/core/optimizer.py
@@ -414,7 +414,7 @@ def pass_quantize_convolutions(graph: Graph) -> None:
             conv_node.dtype = QUANTIZED_NOT_PACKED()
 
         # change the output data type of the quantizers
-        conv_node.quantizer.dtype = Uint32
+        conv_node.quantizer.dtype = Uint32()
         for qtz in conv_node.a_quantizer:
             qtz.dtype = QUANTIZED_NOT_PACKED()
 
diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py
index ad9e02f8e..28a1b2f82 100644
--- a/dlk/tests/test_optimizer.py
+++ b/dlk/tests/test_optimizer.py
@@ -299,6 +299,8 @@ def test_pass_quantize_convolutions(self) -> None:
 
         self.assertEqual(graph1.get_op('aqtz1').dtype, QUANTIZED_NOT_PACKED(),
                          '[Failed] Found output dtype of activation quantizer not proper')
+        self.assertEqual(graph1.get_op('kqtz1').dtype, Uint32(),
+                         '[Failed] Found output dtype of kernel quantizer not proper')
         self.assertEqual(graph1.get_op('conv2').dtype, Float32(),
                          '[Failed] Found output dtype of conv not proper')
 

From 39b86783e62104ae7ecea895145dadbacd8b57a5 Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Tue, 29 Jan 2019 12:32:48 +0900
Subject: [PATCH 42/45] add test for graph has no kernel quantizer

---
 dlk/tests/test_optimizer.py | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py
index 28a1b2f82..ed625e072 100644
--- a/dlk/tests/test_optimizer.py
+++ b/dlk/tests/test_optimizer.py
@@ -247,12 +247,16 @@ def test_pass_pack_weights(self) -> None:
         data1 = np.float32(np.random.rand(1, 2, 2, 3))
         data2 = np.float32(np.random.rand(1, 2, 2, 3))
         graph1 = self.create_sample_graph(data1, data2)
-
         pass_pack_weights(graph1)
-
         self.assertEqual(graph1.get_op('conv2').input_ops['W'].op_type, 'Constant',
                          '[Failed] Found input kernel weights not a constant')
 
+        graph_2_1 = self.create_sample_graph_2(data1)
+        graph_2_2 = self.create_sample_graph_2(data1)
+        pass_pack_weights(graph_2_2)
+        self.assertEqual(graph_2_1, graph_2_2,
+                         '[Failed] Found optimized graph not the same')
+
         print("Test pass #4 pack_weights passed!")
 
     @staticmethod
@@ -286,6 +290,24 @@ def create_sample_graph(data1: np.ndarray, data2: np.ndarray) -> Graph:
 
         return graph
 
+    @staticmethod
+    def create_sample_graph_2(data1: np.ndarray) -> Graph:
+        graph = Graph()
+
+        # input
+        x = Input('placeholder', [1, 5, 5, 3], Float32())
+
+        # Conv1
+        w1 = Constant('weight1', Float32(), data1)
+        conv1 = Conv('conv1', [1, 4, 4, 3], Float32(), {'X': x, 'W': w1}, kernel_shape=[2, 2])
+
+        y = Output('output', [1, 4, 4, 3], Float32(), {'input': conv1})
+
+        # add ops to the graph
+        graph.add_op_and_inputs(y)
+
+        return graph
+
 
 class TestPassQuantizeConvolutions(unittest.TestCase):
     """Test class for packing weight."""

From b5e67a810763d41f4e7324a2f7f98b4aa3820568 Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Tue, 29 Jan 2019 12:44:41 +0900
Subject: [PATCH 43/45] add test for graph has no weights

---
 dlk/tests/test_optimizer.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py
index ed625e072..6647abad9 100644
--- a/dlk/tests/test_optimizer.py
+++ b/dlk/tests/test_optimizer.py
@@ -246,6 +246,7 @@ def test_pass_pack_weights(self) -> None:
         """Test pass."""
         data1 = np.float32(np.random.rand(1, 2, 2, 3))
         data2 = np.float32(np.random.rand(1, 2, 2, 3))
+
         graph1 = self.create_sample_graph(data1, data2)
         pass_pack_weights(graph1)
         self.assertEqual(graph1.get_op('conv2').input_ops['W'].op_type, 'Constant',
@@ -301,7 +302,10 @@ def create_sample_graph_2(data1: np.ndarray) -> Graph:
         w1 = Constant('weight1', Float32(), data1)
         conv1 = Conv('conv1', [1, 4, 4, 3], Float32(), {'X': x, 'W': w1}, kernel_shape=[2, 2])
 
-        y = Output('output', [1, 4, 4, 3], Float32(), {'input': conv1})
+        s1 = Constant('const1', Float32(), np.zeros([1, 4, 4, 3]))
+        add1 = Add('add', [1, 4, 4, 3], Float32(), {'A': conv1, 'B': s1})
+
+        y = Output('output', [1, 4, 4, 3], Float32(), {'input': add1})
 
         # add ops to the graph
         graph.add_op_and_inputs(y)

From 82a72c74d956ba08bb97087c70a215c602f10157 Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Tue, 29 Jan 2019 12:50:06 +0900
Subject: [PATCH 44/45] fix the copyright year

---
 dlk/python/dlk/core/graph_pattern_matching.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dlk/python/dlk/core/graph_pattern_matching.py b/dlk/python/dlk/core/graph_pattern_matching.py
index 6eb328746..79a54757c 100644
--- a/dlk/python/dlk/core/graph_pattern_matching.py
+++ b/dlk/python/dlk/core/graph_pattern_matching.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright 2018 The Blueoil Authors. All Rights Reserved.
+# Copyright 2019 The Blueoil Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From d216594933599112eebde063bafff48f916a8925 Mon Sep 17 00:00:00 2001
From: nlpng <huang@leapmind.io>
Date: Tue, 29 Jan 2019 13:26:24 +0900
Subject: [PATCH 45/45] take care the negative values

---
 dlk/python/dlk/core/optimizer.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py
index e9365a507..433e7ddb9 100644
--- a/dlk/python/dlk/core/optimizer.py
+++ b/dlk/python/dlk/core/optimizer.py
@@ -293,7 +293,8 @@ def pass_compute_thresholds(graph: Graph) -> None:
                         else int(math.ceil(th_per_ch))
 
         # take care of threshold values that are larger than 13-bit signed integer
-        threshold_table[abs(threshold_table) > max_th_value] = max_th_value
+        threshold_table[threshold_table > max_th_value] = max_th_value
+        threshold_table[threshold_table < -max_th_value] = -max_th_value
 
         for c in range(ch):
             threshold_table[c, -1] = 1 \