From ab184de943a78fa5c6cc26facf567c4b887b03e5 Mon Sep 17 00:00:00 2001 From: Antonio Date: Thu, 29 Nov 2018 11:53:52 +0900 Subject: [PATCH 01/45] [WIP] Added pattern matching and new optimization passes --- dlk/python/dlk/core/graph.py | 4 +- dlk/python/dlk/core/graph_pattern_matching.py | 119 +++++++++ dlk/python/dlk/core/operators.py | 72 ++++++ dlk/python/dlk/core/optimizer.py | 3 +- dlk/python/dlk/plugins/tf.py | 29 ++- dlk/python/dlk/scripts/generate_project.py | 230 ++++++++++++++++++ dlk/python/dlk/templates/Makefile.tpl | 8 +- 7 files changed, 447 insertions(+), 18 deletions(-) create mode 100644 dlk/python/dlk/core/graph_pattern_matching.py diff --git a/dlk/python/dlk/core/graph.py b/dlk/python/dlk/core/graph.py index a76cf3367..0ff8fc0c4 100644 --- a/dlk/python/dlk/core/graph.py +++ b/dlk/python/dlk/core/graph.py @@ -93,9 +93,9 @@ def remove_op(self, op: Operator) -> None: del self.__op_type_list[t][i] @property - def operartors(self) -> List[Operator]: + def operators(self) -> List[Operator]: """List up all operators in this graph.""" - return list(self.__ops.keys()) + return list(self.__ops.values()) def get_inputs(self) -> List[Operator]: return list(self.__op_type_list['Input']) diff --git a/dlk/python/dlk/core/graph_pattern_matching.py b/dlk/python/dlk/core/graph_pattern_matching.py new file mode 100644 index 000000000..8ae3840a2 --- /dev/null +++ b/dlk/python/dlk/core/graph_pattern_matching.py @@ -0,0 +1,119 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 The Blueoil Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""Graph pattern matching module.""" + +from core.operators import Operator +from core.graph import Graph + + +class Pattern: + def __init__(self, op=str(), inputs=list()): + self.op = op + self.inputs = inputs + + +class NodeMatch: + def __init__(self): + self.node = None + self.inputs = list() + + +def sort_graph(graph, exec_list): + for node in graph.operators: + node.visited = False + + input_nodes = list() + for node in graph.operators: + input_nodes += [n.name for n in node.input_nodes] + + output_nodes = list() + for node in graph.operators: + if node not in input_nodes: + output_nodes.append(node) + + for node in output_nodes: + top_order(node, exec_list) + + +def top_order(output_node, exec_list): + if output_node.visited: + return + for input_node in output_node.input_nodes: + top_order(input_node, exec_list) + + exec_list.append(output_node) + output_node.visited = True + + +def match_to_execution_list(match, execution_list): + for input_node in match.inputs: + match_to_execution_list(input_node, execution_list) + execution_list.append(match.node) + + +class GraphMatcher: + def __init__(self, input_graph=Graph()): + self.graph_node_list = list() + sort_graph(input_graph, self.graph_node_list) + + self._node_map = {node.name: node for node in self.graph_node_list} + + def record_matched_nodes(self, match, matched_nodes): + matched_nodes.add(match.node.name) + for input_node in match.inputs: + self.record_matched_nodes(input_node, matched_nodes) + + def get_op_type_matches(self, pattern, matches): + matched_nodes = set() + for node in self.graph_node_list: + if node in matched_nodes: + continue + + match = NodeMatch() + if self.does_op_type_match(node, pattern, matched_nodes, match): + self.record_matched_nodes(match, matched_nodes) + matches.append(match) + + def does_op_type_match(self, node, pattern, previously_matched_nodes, match): + if node.name in previously_matched_nodes: + return False + + pattern_matched = False + if pattern.op == '*': + pattern_matched = True + else: + for pattern_op in pattern.op.split('|'): + if node.op_type == pattern_op: + pattern_matched = True + if not pattern_matched: + return False + + match.node = node + if not pattern.inputs: + return True + if len(node.input_nodes) != len(pattern.inputs): + return False + + for i in range(len(pattern.inputs)): + input_node = self._node_map[node.input_nodes[i].name] + input_pattern = pattern.inputs[i] + input_match = NodeMatch() + match.inputs.append(input_match) + + if not self.does_op_type_match(input_node, input_pattern, previously_matched_nodes, input_match): + return False + + return True diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py index 4f47d6187..09c5d39d0 100644 --- a/dlk/python/dlk/core/operators.py +++ b/dlk/python/dlk/core/operators.py @@ -56,6 +56,8 @@ def __init__(self, self._check_consistency() self._rank = len(shape) self._available_buffer = '' + self._visited = False + self._prop_details = Dict def __update_shape(self, shape: List[int], dimension_format: str) -> None: self._shape: List[int] = shape @@ -170,6 +172,19 @@ def input_names(cls) -> List[str]: """ return cls._input_names + @property + def input_nodes(self) -> List['Operator']: + """Return a list of input operators in proper order (original protobuf argument order). + + Returns + ------- + ops : List of operators + This list is already ordered following the order of the arguments in the original + protobuf operators (positional order in the list of arguments). + + """ + return [self._input_ops[i] for i in self.input_names if self.input_ops.get(i)] + @property def output_ops(self) -> OutOps: """Return a dict of output operators. @@ -545,6 +560,22 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li """ raise NotImplementedError(f'operator {cls.__name__} cannot infer its shape.') + @property + def visited(self) -> bool: + return self._visited + + @visited.setter + def visited(self, v: Bool) -> None: + self._visited = v + + @property + def run_it_will_lose_information(self) -> bool: + return False + + @property + def preserve_quantization(self) -> bool: + return False + class Variable(Operator): """Variable class, which must be Input, Output or a constant.""" @@ -694,6 +725,10 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li attrs: Dict[str, Any]) -> List[int]: return lists['input'] + @property + def preserve_quantization(self) -> bool: + return True + class Quantizer(Operator): """Base class for quantizers.""" @@ -740,6 +775,11 @@ def binarizer(self, data: np.ndarray) -> np.ndarray: raise NotImplementedError( f'operator {self.op_type} need to implement the binarizer method') + @property + def run_it_will_lose_information(self) -> bool: + return True + + class QTZ_binary_mean_scaling(Quantizer): """Quantization operator using binary scaling. @@ -853,6 +893,10 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li attrs: Dict[str, Any]) -> List[int]: return lists['input'] + @property + def preserve_quantization(self) -> bool: + return True + class Transpose(Operator): """Transpose operator. @@ -915,6 +959,10 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li perm = attrs['perm'] return [lists['data'][i] for i in perm] + @property + def preserve_quantization(self) -> bool: + return True + class Conv(Operator): """Convolution operator. @@ -1241,6 +1289,10 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li NCHW = [N, C, H, W] return [NCHW[i] for i in [format.index(s) for s in 'NCHW']] + @property + def preserve_quantization(self) -> bool: + return True + class BatchNormalization(Operator): """Batch normalization operator. @@ -1812,6 +1864,10 @@ def run_forward(self) -> np.ndarray: def is_monotonic(self) -> bool: return False + @property + def preserve_quantization(self) -> bool: + return True + class Softmax(Operator): r"""Softmax operator. @@ -1957,6 +2013,10 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li def is_monotonic(self) -> bool: return False + @property + def preserve_quantization(self) -> bool: + return True + class Dropout(Operator): """Dropout operator. @@ -2313,6 +2373,10 @@ def _dispatch_name(self) -> str: def is_monotonic(self) -> bool: return False + @property + def preserve_quantization(self) -> bool: + return True + class Maximum(Operator): """Maximum operator. @@ -2411,6 +2475,10 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li attrs: Dict[str, Any]) -> List[int]: return lists['input'] + @property + def preserve_quantization(self) -> bool: + return True + class Split(Operator): """Split operator. @@ -2484,3 +2552,7 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li out_shape[ch_idx] = int(in_shape[ch_idx] / split) return out_shape + + @property + def preserve_quantization(self) -> bool: + return True diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py index 247bc1798..12040efda 100644 --- a/dlk/python/dlk/core/optimizer.py +++ b/dlk/python/dlk/core/optimizer.py @@ -21,7 +21,7 @@ from core.operators import Add, AveragePool, BatchNormalization, Constant, Conv, Identity, Input, \ MaxPool, Operator, Output, Transpose, Quantizer, QTZ_binary_mean_scaling, QTZ_linear_mid_tread_half, \ Reshape, Softmax, Relu, Flatten, Dropout, Gemm, SpaceToDepth, QTZ_binary_channel_wise_mean_scaling, ConcatOnDepth,\ - Maximum, DepthToSpace, Split + Maximum, DepthToSpace, Split, Variable from typing import Any, Dict, List, Optional, Set, cast from functools import reduce @@ -642,4 +642,3 @@ def threshold_skipping(self, graph: Graph) -> Graph: kwargs: Dict[str, Any] = {} runner1.run(**kwargs) return graph - diff --git a/dlk/python/dlk/plugins/tf.py b/dlk/python/dlk/plugins/tf.py index f25e59d31..584fc1e6f 100644 --- a/dlk/python/dlk/plugins/tf.py +++ b/dlk/python/dlk/plugins/tf.py @@ -422,12 +422,12 @@ def convert_operator(self, op_type: str) -> str: return dlk_op_type if dlk_op_type else op_type def create_new_op(self, node: Any, op_dic: Dict[str, Operator], current_format: str, - input_format_list: List[str]) -> Operator: + input_format_list: List[str], nodes_to_remove) -> Operator: """Create new operators with Node, Input(Constant), Output.""" new_op: Operator if isinstance(node, Node): # operator nodes - new_op = self.create_new_node(node, op_dic, current_format, input_format_list) + new_op = self.create_new_node(node, op_dic, current_format, input_format_list, nodes_to_remove) else: # Input, Output or Constant shape: List[int] = list(map(int, node.get_shape())) @@ -471,7 +471,10 @@ def create_new_op(self, node: Any, op_dic: Dict[str, Operator], current_format: def add_all_nodes(self, graph: Graph) -> None: visited: Set[Any] = set() added: Dict[str, Operator] = {} - self.add_node_to_graph_recursive(self.out_lst[0], graph, visited, added, 'NHWC') + nodes_to_remove = [] + self.add_node_to_graph_recursive(self.out_lst[0], graph, visited, added, 'NHWC', nodes_to_remove) + for node in nodes_to_remove: + graph.remove_op(node) def _get_format(self, node: Any, output_format: str) -> Tuple[str, List[str]]: """Get the dimension format, like 'NCHW', 'HWCN', 'NHWC', etc.""" @@ -533,7 +536,7 @@ def _get_format(self, node: Any, output_format: str) -> Tuple[str, List[str]]: return output_format, [output_format] def add_node_to_graph_recursive(self, current: Any, graph: Graph, visited: Set[Any], added: Dict[str, Operator], - data_format: str) \ + data_format: str, nodes_to_remove) \ -> Operator: if current in visited: return added[current.name] @@ -544,10 +547,10 @@ def add_node_to_graph_recursive(self, current: Any, graph: Graph, visited: Set[A current_format, input_formats = self._get_format(current, data_format) inputs = self.find_inputs(current) for in_put, in_format in zip(inputs, input_formats): - in_op = self.add_node_to_graph_recursive(in_put, graph, visited, added, in_format) + in_op = self.add_node_to_graph_recursive(in_put, graph, visited, added, in_format, nodes_to_remove) added_op_dic[in_op.name] = in_op - op = self.create_new_op(current, added_op_dic, current_format, input_formats) + op = self.create_new_op(current, added_op_dic, current_format, input_formats, nodes_to_remove) graph.add_op(op) @@ -577,7 +580,7 @@ def find_inputs(self, node: Any) -> List[Any]: return inputs def create_new_node(self, node: Node, op_dic: Dict[str, Operator], current_format: str, - input_format_list: List[str]) -> Operator: + input_format_list: List[str], nodes_to_remove) -> Operator: """Create a new operator node. This might be tooooo long code... Parameters @@ -617,11 +620,13 @@ def create_new_node(self, node: Node, op_dic: Dict[str, Operator], current_forma def get_inputs(cdef: Type[Operator], current_node: Any) -> Dict[str, Operator]: input_names = cdef.input_names in_ops: Dict[str, Operator] = {} + in_ops_order: List[int] = [] for n, op in zip(input_names, current_node.inputs): in_ops[n] = op_dic[op] - return in_ops + in_ops_order.append(n) + return in_ops, in_ops_order - input_ops = get_inputs(class_def, node) + input_ops, input_ops_order = get_inputs(class_def, node) # Here find the shape and data type for the op def infer_shape(attrs: Dict[str, Any]) -> List[int]: @@ -955,7 +960,7 @@ def infer_dtype() -> DataType: dimension_format=current_format, ) elif op_type == 'ConcatOnDepth': - axis = node.attribute('axis') + axis = input_ops[input_ops_order[-1]] if current_format.index('C') != axis: ValueError('f{op_type} {node.name} concatenation is only supported on the depth axis') @@ -970,6 +975,10 @@ def infer_dtype() -> DataType: input_ops, dimension_format=current_format, ) + + input_axis_name = input_ops_order[-1] + nodes_to_remove.append(new_op.input_ops[input_axis_name]) + new_op.remove_input(input_axis_name) elif op_type == 'Maximum': if not shape: diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py index 95207e576..0b82bce8b 100644 --- a/dlk/python/dlk/scripts/generate_project.py +++ b/dlk/python/dlk/scripts/generate_project.py @@ -30,6 +30,8 @@ from core.optimizer import Optimizer from code_generater import CodeGenerater from frontend import TensorFlowIO +from core.graph_pattern_matching import GraphMatcher, Pattern +from core.operators import Constant import utils @@ -38,6 +40,214 @@ ROOT_DIR = path.abspath(path.join(SCRITPS_DIR, '../../..')) +def pass_print(graph: Graph, name=str()): + + gm = GraphMatcher(graph) + + print('--- ', name, '---') + matches = list() + p = Pattern("*") + gm.get_op_type_matches(p, matches) + + for m in matches: + print('Match: ', m.node.name, m.node.op_type, m.node.dimension) + for input_node in m.node.input_nodes: + print(' -> ', input_node.name, input_node.op_type) + + print('---') + + +def pass_dot_graph(graph: Graph, filename): + + dot_script = 'digraph {' + + code = {} + counter = 0 + for node in graph.operators: + code[node.name] = counter + counter += 1 + + for node in graph.operators: + for input_node in node.input_nodes: + quant = node.quantizer.name if node.op_type == 'Conv' and node.quantizer else 'None' + aquant = node.a_quantizer[0].name if node.op_type == 'Conv' and node.a_quantizer else 'None' + + dot_script += '"' + format(code[input_node.name], '04X') + '-' + input_node.op_type + '"' + ' -> ' \ + + '"' + format(code[node.name], '04X') + '-' + node.op_type + '-' + aquant + '/' + quant + '"' + ';' + + dot_script += '}' + + with open(filename, 'w') as f: + f.write(dot_script) + + +def pass_remove_identities(graph: Graph): + + gm = GraphMatcher(graph) + + to_be_removed = list() + matches = list() + p = Pattern("Identity") + gm.get_op_type_matches(p, matches) + + for m in matches: + # print('Match: ', m.node.name, m.node.op_type) + # for input_node in m.node.input_nodes: + # print(' -> ', input_node.name, input_node.op_type) + + """skip all identity.""" + in_op = m.node.input_ops['input'] + out_ops = m.node.output_ops['output'] + for out_op in out_ops: + for k, v in out_op.input_ops.items(): + if v == m.node: + # change the output's input to this identity's input + out_op.add_input(k, in_op) + # change the input's output to this identity's output + for k2, v2 in in_op.output_ops.items(): + if m.node in v2: + v2.remove(m.node) + v2.append(out_op) + break + break + + to_be_removed.append(m.node) + + for op in to_be_removed: + graph.remove_op(op) + + +def pass_transpose(graph): + + gm = GraphMatcher(graph) + + matches = list() + p = Pattern("*") + gm.get_op_type_matches(p, matches) + + for m in matches: + # print('Match: ', m.node.name, m.node.op_type) + # for input_node in m.node.input_nodes: + # print(' -> ', input_node.name, input_node.op_type) + + dim = m.node.dimension + shape = m.node.shape + if len(shape) != 4 or len(dim) != 4 or not set(dim).issubset({'N', 'H', 'W', 'C', 'I', 'O'}): + continue + + dim = dim.replace('I', 'C') + dim = dim.replace('O', 'N') + + permutation = list(map(lambda s: dim.index(s), 'NHWC')) + m.node.transpose(permutation) + + +def pass_precompute(graph) -> int: + + gm = GraphMatcher(graph) + + to_be_removed = list() + matches = list() + p = Pattern("*") + gm.get_op_type_matches(p, matches) + + for m in matches: + + # We want operators with inputs + if not m.node.input_nodes: + continue + + # Leave out nodes which execution will lose information. + # They will have a special processing later. + if m.node.run_it_will_lose_information: + continue + + precomputable = True + for input_node in m.node.input_nodes: + if input_node.op_type != 'Constant': + precomputable = False + + if not precomputable: + continue + + to_be_removed += m.node.input_nodes + to_be_removed.append(m.node) + + m.node.run_forward() + + new_constant = Constant( + m.node.name + '_new', + m.node.dtype, + m.node.data, + dimension_format=m.node.dimension + ) + + graph.add_op(new_constant) + + new_constant.add_outputs(m.node.output_ops) + for output_name, consumer_list in m.node.output_ops.items(): + for consumer_node in consumer_list: + for input_name, input_node in consumer_node.input_ops.items(): + if input_node == m.node: + consumer_node.add_input(input_name, new_constant) + break + + for op in to_be_removed: + graph.remove_op(op) + + return len(to_be_removed) + + +def pass_propagate_quantization_details_into_conv(graph): + + gm = GraphMatcher(graph) + + matches = list() + p = Pattern('*') + gm.get_op_type_matches(p, matches) + + quantization_types = [ + 'QTZ_binary_mean_scaling', + 'QTZ_linear_mid_tread_half', + 'QTZ_binary_channel_wise_mean_scaling' + ] + + quantization_details = {} + for m in matches: + if not m.node.preserve_quantization: + quantization_details[m.node.name] = None + continue + + current_node_quant_details = [] + for input_node in m.node.input_nodes: + if input_node.op_type in quantization_types: + current_node_quant_details.append(input_node) + else: + current_node_quant_details.append(quantization_details[input_node.name]) + + if m.node.op_type == 'Conv': + m.node.a_quantizer = [current_node_quant_details[0]] if current_node_quant_details[0] else [] + m.node.quantizer = current_node_quant_details[1] + quantization_details[m.node.name] = None + else: + all_quantizers = True + for quantizer in current_node_quant_details: + if not quantizer: + all_quantizers = False + break + + if not all_quantizers: + same_nbits = False + else: + same_nbits = all(quantizer.nbit == current_node_quant_details[0].nbit + for quantizer in current_node_quant_details) + + quantization_details[m.node.name] = current_node_quant_details[0] if same_nbits else None + + if not same_nbits: + print(f'Warning: Not every input node of {m.node.name} is quantized to the same bit-width') + + def optimize_graph_step(model: Model, config: Config) -> None: """Optimze graph in the model. @@ -51,6 +261,26 @@ def optimize_graph_step(model: Model, config: Config) -> None: """ graph: Graph = model.graph + + pass_print(graph, 'Before') + pass_dot_graph(graph, '/tmp/original.dot') + + pass_remove_identities(graph) + pass_print(graph, 'After identity') + pass_dot_graph(graph, '/tmp/prune_identities.dot') + + pass_transpose(graph) + pass_print(graph, 'After transpose') + pass_dot_graph(graph, '/tmp/transposed.dot') + + pass_precompute(graph) + pass_print(graph, 'After precompute') + + pass_propagate_quantization_details_into_conv(graph) + pass_print(graph, 'After propagate') + + pass_dot_graph(graph, '/tmp/final.dot') + optim = Optimizer() optim.transpose_NHWC(graph) optim.precompute(graph, config.activate_hard_quantization) diff --git a/dlk/python/dlk/templates/Makefile.tpl b/dlk/python/dlk/templates/Makefile.tpl index 14d5a2b8b..b8c417f0d 100644 --- a/dlk/python/dlk/templates/Makefile.tpl +++ b/dlk/python/dlk/templates/Makefile.tpl @@ -134,17 +134,17 @@ clean: -$(RM) $(OBJ) lm_x86: CXX = g++ -lm_x86: FLAGS += $(INCLUDES) -O3 -std=c++0x -g -DUSE_PNG -pthread -g +lm_x86: FLAGS += $(INCLUDES) -O3 -std=c++0x -g -DUSE_PNG -pthread -g -DFUNC_TIME_MEASUREMENT lm_aarch64: CXX = aarch64-linux-gnu-g++ -lm_aarch64: FLAGS += $(INCLUDES) -O3 -std=c++0x -g -DUSE_NEON -DUSE_PNG -pthread -g +lm_aarch64: FLAGS += $(INCLUDES) -O3 -std=c++0x -g -DUSE_NEON -DUSE_PNG -pthread -g -DFUNC_TIME_MEASUREMENT lm_arm: CXX = arm-linux-gnueabihf-g++ -lm_arm: FLAGS += $(INCLUDES) -std=c++0x -O3 -DUSE_NEON -DUSE_PNG -mcpu=cortex-a9 -mfpu=neon -mthumb -s -pthread -g -fopenmp +lm_arm: FLAGS += $(INCLUDES) -std=c++0x -O3 -DUSE_NEON -DUSE_PNG -mcpu=cortex-a9 -mfpu=neon -mthumb -s -pthread -g -fopenmp -DFUNC_TIME_MEASUREMENT lm_arm: CXXFLAGS += lm_fpga: CXX = arm-linux-gnueabihf-g++ -lm_fpga: FLAGS += $(INCLUDES) -std=c++0x -O3 -DUSE_NEON -DRUN_ON_FPGA -DUSE_PNG -mcpu=cortex-a9 -mfpu=neon -mthumb -s -pthread -g -fopenmp +lm_fpga: FLAGS += $(INCLUDES) -std=c++0x -O3 -DUSE_NEON -DRUN_ON_FPGA -DUSE_PNG -mcpu=cortex-a9 -mfpu=neon -mthumb -s -pthread -g -fopenmp -DFUNC_TIME_MEASUREMENT lm_fpga: CXXFLAGS += lib_x86: CXX = g++ From 7376a1245b904999d06edb2ed993c271e3f964d8 Mon Sep 17 00:00:00 2001 From: Antonio Date: Thu, 29 Nov 2018 15:16:17 +0900 Subject: [PATCH 02/45] Fix: not necessary, axis is attribute in DLK IR --- dlk/python/dlk/core/view.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlk/python/dlk/core/view.py b/dlk/python/dlk/core/view.py index 38fb28319..d711576a0 100644 --- a/dlk/python/dlk/core/view.py +++ b/dlk/python/dlk/core/view.py @@ -655,7 +655,7 @@ def run(self): input_list_name = op.name + '_inputs' depth_list_name = op.name + '_inputs_depth' - number_of_inputs = len(input_ops) - 1 + number_of_inputs = len(input_ops) concat_input = {} for k, v in input_ops.items(): if not v.is_variable: From 8a269d5858a86081dfeac5be6de9216fe1174e5a Mon Sep 17 00:00:00 2001 From: Antonio Date: Wed, 5 Dec 2018 18:44:19 +0900 Subject: [PATCH 03/45] Added a pass for computing thresholds based on binary search --- dlk/python/dlk/core/operators.py | 3 + dlk/python/dlk/scripts/generate_project.py | 100 ++++++++++++++++++++- 2 files changed, 99 insertions(+), 4 deletions(-) diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py index 09c5d39d0..9ceabb4b5 100644 --- a/dlk/python/dlk/core/operators.py +++ b/dlk/python/dlk/core/operators.py @@ -653,6 +653,9 @@ def __init__(self, self._packed = packed super().__init__(name, shape, dtype, {}, data, dimension_format=dimension_format) + def run_forward(self) -> np.ndarray: + return self._data + @property def is_packed(self) -> bool: return self._packed diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py index 0b82bce8b..57bfb4df2 100644 --- a/dlk/python/dlk/scripts/generate_project.py +++ b/dlk/python/dlk/scripts/generate_project.py @@ -22,6 +22,7 @@ import click from os import path import shutil +import numpy as np from core.config import Config from core.graph import Graph @@ -30,9 +31,10 @@ from core.optimizer import Optimizer from code_generater import CodeGenerater from frontend import TensorFlowIO -from core.graph_pattern_matching import GraphMatcher, Pattern +from core.graph_pattern_matching import GraphMatcher, Pattern, match_to_execution_list from core.operators import Constant +from collections import defaultdict import utils SCRITPS_DIR = path.abspath(path.dirname(__file__)) @@ -69,11 +71,9 @@ def pass_dot_graph(graph: Graph, filename): for node in graph.operators: for input_node in node.input_nodes: - quant = node.quantizer.name if node.op_type == 'Conv' and node.quantizer else 'None' - aquant = node.a_quantizer[0].name if node.op_type == 'Conv' and node.a_quantizer else 'None' dot_script += '"' + format(code[input_node.name], '04X') + '-' + input_node.op_type + '"' + ' -> ' \ - + '"' + format(code[node.name], '04X') + '-' + node.op_type + '-' + aquant + '/' + quant + '"' + ';' + + '"' + format(code[node.name], '04X') + '-' + node.op_type + '"' + ';' dot_script += '}' @@ -248,6 +248,95 @@ def pass_propagate_quantization_details_into_conv(graph): print(f'Warning: Not every input node of {m.node.name} is quantized to the same bit-width') +def pass_compute_thresholds(graph): + + gm = GraphMatcher(graph) + + quantization_types_pattern = \ + 'QTZ_linear_mid_tread_half' + + matches = list() + p = Pattern(quantization_types_pattern, + [ + Pattern('BatchNormalization', + [ + Pattern('Conv'), + Pattern('*'), + Pattern('*'), + Pattern('*'), + Pattern('*') + ]), + Pattern('*'), + Pattern('*'), + ]) + + gm.get_op_type_matches(p, matches) + + for m in matches: + + quantizer_conv_output_node = m.node + batch_norm_node = quantizer_conv_output_node.input_nodes[0] + conv_node = batch_norm_node.input_nodes[0] + + # check if this is a quantized convolution + if not conv_node.quantizer or not conv_node.a_quantizer: + continue + + quantizer_conv_weights = conv_node.quantizer + quantizer_conv_weights.run_forward() + scaling_factor = quantizer_conv_weights.scaling_factor + + match_execution_list = list() + match_to_execution_list(m, match_execution_list) + + ths = defaultdict(list) + computed_quantized_results = defaultdict(set) + magic_number = 2 + + # TODO: make '3' function on the number of bits of the number of bits + for value in range(0, 3): + for idx in range(scaling_factor.size): + + # assume that the output value will be a 16-bit signed integer + low = -(2**15) + high = 2**15 - 1 + + # binary search + while low <= high: + mid = low + (high - low) // 2 + input_data = (scaling_factor * mid) * 2.0 / 3.0 # TODO: get from quantizers (n_bits, max_value) + data_dict = batch_norm_node.run(data=input_data) + data_dict = quantizer_conv_output_node.run(data=data_dict['data']) + result = data_dict['data'][idx] + computed_quantized_results[idx].add(result) + + if result > value: + high = mid - 1 + else: + low = mid + 1 + + ths[idx].append(low) + + # check if increasing, decreasing or constant + for channel, values in computed_quantized_results.items(): + if len(values) == 1: + ths[channel].append(values.pop() + magic_number) + else: + first_threshold_result = values.pop() + second_threshold_result = values.pop() + + if first_threshold_result < second_threshold_result: + ths[channel].append(1) + else: + ths[channel].append(-1) + + # put everything into a list to be compatible with the rest of the code + ths_list = [] + for channel in sorted(ths.keys()): + ths_list += ths[channel] + conv_node.thresholds = ths_list + + def optimize_graph_step(model: Model, config: Config) -> None: """Optimze graph in the model. @@ -273,12 +362,15 @@ def optimize_graph_step(model: Model, config: Config) -> None: pass_print(graph, 'After transpose') pass_dot_graph(graph, '/tmp/transposed.dot') + # TODO: call until pass_precompute returns 0 pass_precompute(graph) pass_print(graph, 'After precompute') pass_propagate_quantization_details_into_conv(graph) pass_print(graph, 'After propagate') + pass_compute_thresholds(graph) + pass_dot_graph(graph, '/tmp/final.dot') optim = Optimizer() From ebe9d3ad2d9b46b772a41fd0fd0cac8713974275 Mon Sep 17 00:00:00 2001 From: Antonio Date: Fri, 7 Dec 2018 14:28:34 +0900 Subject: [PATCH 04/45] [WIP] Visualization, weight packing and small fixes. Start to comment out old code --- dlk/python/dlk/core/operators.py | 7 - dlk/python/dlk/core/optimizer.py | 147 +++++++++++---------- dlk/python/dlk/scripts/generate_project.py | 140 +++++++++++++++----- 3 files changed, 178 insertions(+), 116 deletions(-) diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py index 9ceabb4b5..680591650 100644 --- a/dlk/python/dlk/core/operators.py +++ b/dlk/python/dlk/core/operators.py @@ -568,10 +568,6 @@ def visited(self) -> bool: def visited(self, v: Bool) -> None: self._visited = v - @property - def run_it_will_lose_information(self) -> bool: - return False - @property def preserve_quantization(self) -> bool: return False @@ -778,9 +774,6 @@ def binarizer(self, data: np.ndarray) -> np.ndarray: raise NotImplementedError( f'operator {self.op_type} need to implement the binarizer method') - @property - def run_it_will_lose_information(self) -> bool: - return True diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py index 12040efda..27e03939a 100644 --- a/dlk/python/dlk/core/optimizer.py +++ b/dlk/python/dlk/core/optimizer.py @@ -485,87 +485,88 @@ def _apply_threshold_skipping(self, op_lst: List[Operator]) -> None: if start is not None and finish is not None: - def linear_qtz2float(x: np.ndarray, n_value: int, max_value: float) -> np.ndarray: - real_x = x / np.float64(n_value) * np.float64(max_value) - return real_x.astype(np.float64) + # def linear_qtz2float(x: np.ndarray, n_value: int, max_value: float) -> np.ndarray: + # real_x = x / np.float64(n_value) * np.float64(max_value) + # return real_x.astype(np.float64) # Step 1: Compute thresholds for Convolution operators - aqtzer = cast(Quantizer, start.a_quantizer[0]) # Activation Quantizers should all have the same bits - bit = aqtzer.nbit - max_v = aqtzer.max_v - if bit is None or max_v is None: - ValueError(f'activation quantizer of node {start.name} has bit or max value of None') - - n = 2 ** bit - 1 - ch = start.channel - lch = start.input_ops['X'].channel - k = start.kernel_height * start.kernel_width * lch * n - qtzer = cast(Quantizer, start.quantizer) - conv_results = [x for x in range(-k, k + 1, 1)] - th_tmp = np.empty([ch, n + 1], dtype=np.int32) - v_now = dict.fromkeys([x for x in range(ch)], 0) - th_now = 0 - val_neg_flag = -1 - val_pos_flag = 1 - all_transdata: Dict[int, Dict[str, Any]] = {} + # aqtzer = cast(Quantizer, start.a_quantizer[0]) # Activation Quantizers should all have the same bits + # bit = aqtzer.nbit + # max_v = aqtzer.max_v + # if bit is None or max_v is None: + # ValueError(f'activation quantizer of node {start.name} has bit or max value of None') + + # n = 2 ** bit - 1 + # ch = start.channel + # lch = start.input_ops['X'].channel + # k = start.kernel_height * start.kernel_width * lch * n + # qtzer = cast(Quantizer, start.quantizer) + # conv_results = [x for x in range(-k, k + 1, 1)] + # th_tmp = np.empty([ch, n + 1], dtype=np.int32) + # v_now = dict.fromkeys([x for x in range(ch)], 0) + # th_now = 0 + # val_neg_flag = -1 + # val_pos_flag = 1 + # all_transdata: Dict[int, Dict[str, Any]] = {} # Step 1-1: initalize thresholds - for conv_res in conv_results: - conv_out = np.full(ch, conv_res, dtype=np.float64) - conv_out *= qtzer.scaling_factor if qtzer.scaling_factor is not None \ - else ValueError(f'oops Quantizer of node {start.name} has scaling factor of None') - - conv_data = linear_qtz2float(conv_out, n, max_v) - - trans_data: Dict[str, Any] = {'data': conv_data} - for idx, op in sorted(transitions.items(), reverse=True): - trans_data = op.run(**trans_data) - - for depth in range(ch): - init = -k if depth in trans_data['nega_idx'] else k - th_tmp[depth, :] = init - - all_transdata[conv_res] = trans_data + # for conv_res in conv_results: + # conv_out = np.full(ch, conv_res, dtype=np.float64) + # conv_out *= qtzer.scaling_factor if qtzer.scaling_factor is not None \ + # else ValueError(f'oops Quantizer of node {start.name} has scaling factor of None') + # + # conv_data = linear_qtz2float(conv_out, n, max_v) + # + # trans_data: Dict[str, Any] = {'data': conv_data} + # for idx, op in sorted(transitions.items(), reverse=True): + # trans_data = op.run(**trans_data) + # + # for depth in range(ch): + # init = -k if depth in trans_data['nega_idx'] else k + # th_tmp[depth, :] = init + # + # all_transdata[conv_res] = trans_data # Step 1-2: update thresholds - for conv_res in conv_results: - trans_data = all_transdata[conv_res] - qtz_out = trans_data['data'] - qtz_mu = np.mean(qtz_out) - if qtz_mu != th_now: - for depth in range(ch): - is_negative = depth in trans_data['nega_idx'] - if v_now.get(depth) != qtz_out[depth]: - if is_negative: - th_tmp[depth, abs(n - qtz_out[depth] - 1)] = conv_res - else: - th_tmp[depth, qtz_out[depth] - 1] = conv_res - v_now[depth] = qtz_out[depth] - th_tmp[depth, n] = -1 if is_negative else 1 - for depth in range(ch): - constant = reduce(lambda x, y: x and y, - [th_tmp[depth, i] == th_tmp[depth, i + 1] for i in range(n - 1)]) - th_tmp[depth, n] = qtz_out[depth] + 2 if constant else th_tmp[depth, n] - # note: 2 above is a magic number. the result value must not be 1 nor -1. - th_now = qtz_mu - - start.thresholds = th_tmp.flatten().tolist() + # for conv_res in conv_results: + # trans_data = all_transdata[conv_res] + # qtz_out = trans_data['data'] + # qtz_mu = np.mean(qtz_out) + # if qtz_mu != th_now: + # for depth in range(ch): + # is_negative = depth in trans_data['nega_idx'] + # if v_now.get(depth) != qtz_out[depth]: + # if is_negative: + # th_tmp[depth, abs(n - qtz_out[depth] - 1)] = conv_res + # else: + # th_tmp[depth, qtz_out[depth] - 1] = conv_res + # v_now[depth] = qtz_out[depth] + # th_tmp[depth, n] = -1 if is_negative else 1 + # for depth in range(ch): + # constant = reduce(lambda x, y: x and y, + # [th_tmp[depth, i] == th_tmp[depth, i + 1] for i in range(n - 1)]) + # th_tmp[depth, n] = qtz_out[depth] + 2 if constant else th_tmp[depth, n] + # # note: 2 above is a magic number. the result value must not be 1 nor -1. + # th_now = qtz_mu + + # start.thresholds = th_tmp.flatten().tolist() # Step 2: Skipping unused operators, e.g. batch normalization, linear activation quantizer - if start.has_thresholds: - if start.dtype is not finish.dtype: - start.dtype = finish.dtype - for consumers in finish.output_ops.values(): - for consumer in consumers: - for idex, y in start.output_ops.items(): - if not bool(set(consumers) & set(y)): - start.remove_output(idex) - start.add_output(idex, consumer) - - for indent, v in consumer.input_ops.items(): - if v == finish: - consumer.add_input(indent, start) - break + pass + # if start.has_thresholds: + # if start.dtype is not finish.dtype: + # start.dtype = finish.dtype + # for consumers in finish.output_ops.values(): + # for consumer in consumers: + # for idex, y in start.output_ops.items(): + # if not bool(set(consumers) & set(y)): + # start.remove_output(idex) + # start.add_output(idex, consumer) + # + # for indent, v in consumer.input_ops.items(): + # if v == finish: + # consumer.add_input(indent, start) + # break else: pass diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py index 57bfb4df2..606871625 100644 --- a/dlk/python/dlk/scripts/generate_project.py +++ b/dlk/python/dlk/scripts/generate_project.py @@ -33,7 +33,9 @@ from frontend import TensorFlowIO from core.graph_pattern_matching import GraphMatcher, Pattern, match_to_execution_list from core.operators import Constant - +from modules.packer import Packer +from core.data_types import Uint32, QUANTIZED_NOT_PACKED +from typing import cast from collections import defaultdict import utils @@ -69,11 +71,22 @@ def pass_dot_graph(graph: Graph, filename): code[node.name] = counter counter += 1 + # for node in graph.operators: + # for input_node in node.input_nodes: + # + # dot_script += '"' + format(code[input_node.name], '04X') + '-' + input_node.op_type + '"' + ' -> ' \ + # + '"' + format(code[node.name], '04X') + '-' + node.op_type + '"' + ';' + for node in graph.operators: - for input_node in node.input_nodes: - dot_script += '"' + format(code[input_node.name], '04X') + '-' + input_node.op_type + '"' + ' -> ' \ - + '"' + format(code[node.name], '04X') + '-' + node.op_type + '"' + ';' + shape = '-' + if node.shape: + shape = 'x'.join(str(x) for x in node.shape) + shape += '(' + node.dimension + ')' + + dot_script += node.name + '[label=" ' + format(code[node.name], '04X') + '| ' + node.op_type + '| ' + shape + '| ' + node.dtype.cpptype() + '" shape = "record"];' + for i in node.input_nodes: + dot_script += i.name + ' -> ' + node.name + ';' dot_script += '}' @@ -91,10 +104,6 @@ def pass_remove_identities(graph: Graph): gm.get_op_type_matches(p, matches) for m in matches: - # print('Match: ', m.node.name, m.node.op_type) - # for input_node in m.node.input_nodes: - # print(' -> ', input_node.name, input_node.op_type) - """skip all identity.""" in_op = m.node.input_ops['input'] out_ops = m.node.output_ops['output'] @@ -126,10 +135,6 @@ def pass_transpose(graph): gm.get_op_type_matches(p, matches) for m in matches: - # print('Match: ', m.node.name, m.node.op_type) - # for input_node in m.node.input_nodes: - # print(' -> ', input_node.name, input_node.op_type) - dim = m.node.dimension shape = m.node.shape if len(shape) != 4 or len(dim) != 4 or not set(dim).issubset({'N', 'H', 'W', 'C', 'I', 'O'}): @@ -142,26 +147,24 @@ def pass_transpose(graph): m.node.transpose(permutation) -def pass_precompute(graph) -> int: +def pass_precompute(graph, processed_nodes): gm = GraphMatcher(graph) - to_be_removed = list() matches = list() - p = Pattern("*") + p = Pattern('*') gm.get_op_type_matches(p, matches) + processed_before_precompute = len(processed_nodes) + for m in matches: + if m.node in processed_nodes: + continue # We want operators with inputs if not m.node.input_nodes: continue - # Leave out nodes which execution will lose information. - # They will have a special processing later. - if m.node.run_it_will_lose_information: - continue - precomputable = True for input_node in m.node.input_nodes: if input_node.op_type != 'Constant': @@ -170,8 +173,8 @@ def pass_precompute(graph) -> int: if not precomputable: continue - to_be_removed += m.node.input_nodes - to_be_removed.append(m.node) + processed_nodes += m.node.input_nodes + processed_nodes.append(m.node) m.node.run_forward() @@ -192,10 +195,7 @@ def pass_precompute(graph) -> int: consumer_node.add_input(input_name, new_constant) break - for op in to_be_removed: - graph.remove_op(op) - - return len(to_be_removed) + return len(processed_nodes) > processed_before_precompute def pass_propagate_quantization_details_into_conv(graph): @@ -286,9 +286,6 @@ def pass_compute_thresholds(graph): quantizer_conv_weights.run_forward() scaling_factor = quantizer_conv_weights.scaling_factor - match_execution_list = list() - match_to_execution_list(m, match_execution_list) - ths = defaultdict(list) computed_quantized_results = defaultdict(set) magic_number = 2 @@ -298,8 +295,9 @@ def pass_compute_thresholds(graph): for idx in range(scaling_factor.size): # assume that the output value will be a 16-bit signed integer - low = -(2**15) - high = 2**15 - 1 + n = 2**15 + low = -n + 1 + high = n - 2 # binary search while low <= high: @@ -336,6 +334,72 @@ def pass_compute_thresholds(graph): ths_list += ths[channel] conv_node.thresholds = ths_list + # Disconnect batchnorm and the quantizer + out_ops = quantizer_conv_output_node.output_ops['output'] + for output_node in out_ops: + for input_name, input_node in output_node.input_ops.items(): + if input_node == quantizer_conv_output_node: + output_node.add_input(input_name, conv_node) + + conv_node.remove_output('Y') + conv_node.add_outputs({'Y': out_ops}) + + # TODO: temporary (only for drawing better graphs) + batch_norm_node.remove_input('X') + + +def pass_pack_weights(graph): + + gm = GraphMatcher(graph) + + quantization_types = [ + 'QTZ_binary_mean_scaling', + 'QTZ_linear_mid_tread_half', + 'QTZ_binary_channel_wise_mean_scaling' + ] + + matches = list() + p = Pattern('Conv') + + gm.get_op_type_matches(p, matches) + + # TODO: pass proper parameters + packer = Packer(1, 32) + + for m in matches: + conv_node = m.node + + # check if this is a quantized convolution + if not conv_node.quantizer or not conv_node.a_quantizer: + continue + + weight_quantizer = conv_node.quantizer + if weight_quantizer.op_type not in quantization_types: + continue + + # Quantize the weights + weight_quantizer.run_forward() + op_data = weight_quantizer.binarizer(weight_quantizer.data) + data = packer.run(op_data.astype(np.float32), weight_quantizer.dimension) + + quantized_constant = Constant( + weight_quantizer.name + '_new', + Uint32(), + data, + packed=True, + actual_shape=weight_quantizer.shape + ) + + graph.add_op(quantized_constant) + + quantized_constant.add_outputs(weight_quantizer.output_ops) + for output_name, consumer_list in weight_quantizer.output_ops.items(): + for consumer_node in consumer_list: + for input_name, input_node in consumer_node.input_ops.items(): + if input_node == weight_quantizer: + consumer_node.add_input(input_name, quantized_constant) + break + def optimize_graph_step(model: Model, config: Config) -> None: """Optimze graph in the model. @@ -362,24 +426,28 @@ def optimize_graph_step(model: Model, config: Config) -> None: pass_print(graph, 'After transpose') pass_dot_graph(graph, '/tmp/transposed.dot') - # TODO: call until pass_precompute returns 0 - pass_precompute(graph) - pass_print(graph, 'After precompute') - pass_propagate_quantization_details_into_conv(graph) pass_print(graph, 'After propagate') pass_compute_thresholds(graph) + pass_pack_weights(graph) + + # processed_nodes = [] + # while pass_precompute(graph, processed_nodes=processed_nodes): + # pass + # pass_print(graph, 'After precompute') pass_dot_graph(graph, '/tmp/final.dot') optim = Optimizer() - optim.transpose_NHWC(graph) + # optim.transpose_NHWC(graph) optim.precompute(graph, config.activate_hard_quantization) if config.threshold_skipping: optim.threshold_skipping(graph) + + def generate_code_step(model: Model, config: Config) -> None: """Generate code for the model. From 180d156581dd40c15efbc1690421b57f6bfac7bf Mon Sep 17 00:00:00 2001 From: Antonio Date: Sat, 8 Dec 2018 12:33:15 +0900 Subject: [PATCH 05/45] [WIP] Add extra passes and small bugfixes. Now doesnt depend on old optimizer code --- dlk/python/dlk/core/operators.py | 19 ++++ dlk/python/dlk/scripts/generate_project.py | 114 +++++++++++++-------- 2 files changed, 92 insertions(+), 41 deletions(-) diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py index 680591650..369860de2 100644 --- a/dlk/python/dlk/core/operators.py +++ b/dlk/python/dlk/core/operators.py @@ -822,6 +822,14 @@ def run_forward(self) -> np.ndarray: in_data = self.input_ops['input'].data self._scaling_factor = np.mean(np.abs(in_data)) self._data = np.sign(in_data) + + return self._data * self._scaling_factor + + def run_forward_no_scaling_factor(self) -> np.ndarray: + in_data = self.input_ops['input'].data + self._scaling_factor = np.mean(np.abs(in_data)) + self._data = np.sign(in_data) + return self._data @classmethod @@ -2306,6 +2314,17 @@ def run_forward(self) -> np.ndarray: in_data = self.input_ops['input'].data self._scaling_factor = np.mean(np.abs(in_data), axis=(1, 2, 3)).astype(np.float32) self._data = np.sign(in_data) + + scaling = copy.deepcopy(self._scaling_factor) + extra_dims = tuple(np.ones((len(self._data.shape) - len(scaling.shape)), dtype=np.int32)) + scaling = scaling.reshape(scaling.shape + extra_dims) + + return scaling * self._data + + def run_forward_no_scaling_factor(self) -> np.ndarray: + in_data = self.input_ops['input'].data + self._scaling_factor = np.mean(np.abs(in_data), axis=(1, 2, 3)).astype(np.float32) + self._data = np.sign(in_data) return self._data def binarizer(self, data: np.ndarray) -> np.ndarray: diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py index 606871625..960824834 100644 --- a/dlk/python/dlk/scripts/generate_project.py +++ b/dlk/python/dlk/scripts/generate_project.py @@ -176,12 +176,12 @@ def pass_precompute(graph, processed_nodes): processed_nodes += m.node.input_nodes processed_nodes.append(m.node) - m.node.run_forward() + data = m.node.run_forward() new_constant = Constant( m.node.name + '_new', m.node.dtype, - m.node.data, + data, dimension_format=m.node.dimension ) @@ -206,46 +206,37 @@ def pass_propagate_quantization_details_into_conv(graph): p = Pattern('*') gm.get_op_type_matches(p, matches) - quantization_types = [ + qtypes = [ 'QTZ_binary_mean_scaling', 'QTZ_linear_mid_tread_half', 'QTZ_binary_channel_wise_mean_scaling' ] - quantization_details = {} + quant_details = defaultdict(list) for m in matches: if not m.node.preserve_quantization: - quantization_details[m.node.name] = None + quant_details[m.node.name] = [] continue - current_node_quant_details = [] - for input_node in m.node.input_nodes: - if input_node.op_type in quantization_types: - current_node_quant_details.append(input_node) - else: - current_node_quant_details.append(quantization_details[input_node.name]) - if m.node.op_type == 'Conv': - m.node.a_quantizer = [current_node_quant_details[0]] if current_node_quant_details[0] else [] - m.node.quantizer = current_node_quant_details[1] - quantization_details[m.node.name] = None - else: - all_quantizers = True - for quantizer in current_node_quant_details: - if not quantizer: - all_quantizers = False - break + input_node = m.node.input_nodes[0] + weight_node = m.node.input_nodes[1] - if not all_quantizers: - same_nbits = False - else: - same_nbits = all(quantizer.nbit == current_node_quant_details[0].nbit - for quantizer in current_node_quant_details) + m.node.a_quantizer = [input_node] if input_node.op_type in qtypes else quant_details[input_node.name] + m.node.quantizer = weight_node if weight_node.op_type in qtypes else quant_details[weight_node.name] - quantization_details[m.node.name] = current_node_quant_details[0] if same_nbits else None + quant_details[m.node.name] = [] + else: + qtzs = [] + for n in m.node.input_nodes: + if n.op_type in qtypes: + qtzs.append(n) + else: + for q in quant_details[n.name]: + qtzs.append(q) - if not same_nbits: - print(f'Warning: Not every input node of {m.node.name} is quantized to the same bit-width') + quant_details[m.node.name] = qtzs if len(qtzs) == len(m.node.input_nodes) else [] + # TODO: check if the quantizers use same n_bits def pass_compute_thresholds(graph): @@ -283,7 +274,7 @@ def pass_compute_thresholds(graph): continue quantizer_conv_weights = conv_node.quantizer - quantizer_conv_weights.run_forward() + quantizer_conv_weights.run_forward_no_scaling_factor() scaling_factor = quantizer_conv_weights.scaling_factor ths = defaultdict(list) @@ -292,7 +283,7 @@ def pass_compute_thresholds(graph): # TODO: make '3' function on the number of bits of the number of bits for value in range(0, 3): - for idx in range(scaling_factor.size): + for idx in range(conv_node.channel): # assume that the output value will be a 16-bit signed integer n = 2**15 @@ -345,7 +336,7 @@ def pass_compute_thresholds(graph): conv_node.add_outputs({'Y': out_ops}) # TODO: temporary (only for drawing better graphs) - batch_norm_node.remove_input('X') + # batch_norm_node.remove_input('X') def pass_pack_weights(graph): @@ -401,6 +392,47 @@ def pass_pack_weights(graph): break +def pass_quantize_convolutions(graph): + + gm = GraphMatcher(graph) + + matches = list() + p = Pattern('Conv') + gm.get_op_type_matches(p, matches) + + for m in matches: + conv_node = m.node + + # check if this is a quantized convolution + if not conv_node.quantizer or not conv_node.a_quantizer: + continue + + # Mark as quantized convolution + conv_node.is_quantized = True + + # change the output data type of the convolution if thresholds are available + if conv_node.has_thresholds: + conv_node.dtype = QUANTIZED_NOT_PACKED + + # change the output data type of the quantizers + conv_node.quantizer.dtype = Uint32 + for qtz in conv_node.a_quantizer: + qtz.dtype = QUANTIZED_NOT_PACKED + + +def pass_propagate_datatypes(graph): + + gm = GraphMatcher(graph) + + matches = list() + p = Pattern('*') + gm.get_op_type_matches(p, matches) + + for m in matches: + if m.node.op_type != 'Conv' and m.node.preserve_quantization: + m.node.dtype = m.node.input_nodes[0].dtype + + def optimize_graph_step(model: Model, config: Config) -> None: """Optimze graph in the model. @@ -431,21 +463,21 @@ def optimize_graph_step(model: Model, config: Config) -> None: pass_compute_thresholds(graph) pass_pack_weights(graph) + pass_quantize_convolutions(graph) + pass_propagate_datatypes(graph) - # processed_nodes = [] - # while pass_precompute(graph, processed_nodes=processed_nodes): - # pass - # pass_print(graph, 'After precompute') + processed_nodes = [] + while pass_precompute(graph, processed_nodes=processed_nodes): + pass + pass_print(graph, 'After precompute') pass_dot_graph(graph, '/tmp/final.dot') optim = Optimizer() # optim.transpose_NHWC(graph) - optim.precompute(graph, config.activate_hard_quantization) - if config.threshold_skipping: - optim.threshold_skipping(graph) - - + # optim.precompute(graph, config.activate_hard_quantization) + # if config.threshold_skipping: + # optim.threshold_skipping(graph) def generate_code_step(model: Model, config: Config) -> None: From ba900c588ac71d415d0d4925ec1bfd40376e171a Mon Sep 17 00:00:00 2001 From: Antonio Date: Mon, 10 Dec 2018 12:17:39 +0900 Subject: [PATCH 06/45] Added base code so Neil-san can work on threshold skipping --- dlk/python/dlk/scripts/generate_project.py | 23 +++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py index 960824834..7ff0401cd 100644 --- a/dlk/python/dlk/scripts/generate_project.py +++ b/dlk/python/dlk/scripts/generate_project.py @@ -32,7 +32,7 @@ from code_generater import CodeGenerater from frontend import TensorFlowIO from core.graph_pattern_matching import GraphMatcher, Pattern, match_to_execution_list -from core.operators import Constant +from core.operators import Constant, Operator from modules.packer import Packer from core.data_types import Uint32, QUANTIZED_NOT_PACKED from typing import cast @@ -265,10 +265,28 @@ def pass_compute_thresholds(graph): for m in matches: + # TODO: Neil-san, please use this to apply your threshold. 'p' is the path from qtz to conv (both included) + # TODO: Neil-san, you can access to the quantizers thorugh 'conv.a_quantizer' and 'conv.quantizer' + # p = [m.node] + # while p[-1].op_type != 'Conv': + # non_variable_input = [inode for inode in p[-1].input_nodes + # if (not cast(Operator, inode).is_variable and inode.is_monotonic) + # or inode.op_type == 'Conv'] + # if len(non_variable_input) != 1: + # break + # p.append(non_variable_input[-1]) + # + # if p[-1].op_type != 'Conv': + # continue + # quantizer_conv_output_node = p[0] + # conv_node = p[-1] + + # TODO: Neil-san, you can delete this quantizer_conv_output_node = m.node batch_norm_node = quantizer_conv_output_node.input_nodes[0] conv_node = batch_norm_node.input_nodes[0] + # TODO: Neil-san, you should keep this # check if this is a quantized convolution if not conv_node.quantizer or not conv_node.a_quantizer: continue @@ -306,6 +324,7 @@ def pass_compute_thresholds(graph): ths[idx].append(low) + # TODO: Neil-san, you don't probably need this # check if increasing, decreasing or constant for channel, values in computed_quantized_results.items(): if len(values) == 1: @@ -319,12 +338,14 @@ def pass_compute_thresholds(graph): else: ths[channel].append(-1) + # TODO: Neil-san, you keep the things in a list already # put everything into a list to be compatible with the rest of the code ths_list = [] for channel in sorted(ths.keys()): ths_list += ths[channel] conv_node.thresholds = ths_list + # TODO: Neil-san, you should keep this # Disconnect batchnorm and the quantizer out_ops = quantizer_conv_output_node.output_ops['output'] for output_node in out_ops: From 58cbe844cb25042770df24c98cb8036f777e7cab Mon Sep 17 00:00:00 2001 From: nlpng Date: Mon, 10 Dec 2018 16:13:59 +0900 Subject: [PATCH 07/45] changes to the threshold skipping pass --- dlk/python/dlk/core/operators.py | 20 +++ dlk/python/dlk/scripts/generate_project.py | 134 ++++++++------------- 2 files changed, 73 insertions(+), 81 deletions(-) diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py index 369860de2..082ea8a16 100644 --- a/dlk/python/dlk/core/operators.py +++ b/dlk/python/dlk/core/operators.py @@ -1377,6 +1377,17 @@ def run(self, **kwargs) -> Dict: kwargs['data'] = scale * x_norm + beta return kwargs + def de_run(self, **kwargs) -> Dict: + scale = np.float64(self._input_ops['scale'].data) + beta = np.float64(self._input_ops['B'].data) + mean = np.float64(self._input_ops['mean'].data) + var = np.float64(self._input_ops['var'].data) + + kwargs['nega_idx'] = [v for v in range(len(scale)) if scale[v] < 0] + + kwargs['data'] = (((kwargs['data'] - beta) / scale) * np.sqrt(var + self.epsilon)) + mean + return kwargs + def run_forward(self) -> np.ndarray: kwdata = {'data': self.input_ops['X'].data} data_dict = self.run(**kwdata) @@ -1445,6 +1456,15 @@ def run(self, **kwargs) -> Dict: kwargs['data'] = np.round(in_data * n / max_value).astype(np.int32) return kwargs + def de_run(self, **kwargs) -> Dict: + bit = self._input_ops['Y'].data + max_value = np.float64(self._input_ops['Z'].data) + in_data = np.float64(kwargs['data']) + + n = 2 ** bit - 1 + kwargs['data'] = (in_data * np.float64(max_value)) / np.float64(n) + return kwargs + def run_forward(self) -> np.ndarray: data_dict = self.run(data=self._input_ops['X'].data) self._data = data_dict['data'] diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py index 7ff0401cd..e9f05d378 100644 --- a/dlk/python/dlk/scripts/generate_project.py +++ b/dlk/python/dlk/scripts/generate_project.py @@ -22,6 +22,7 @@ import click from os import path import shutil +import math import numpy as np from core.config import Config @@ -243,23 +244,8 @@ def pass_compute_thresholds(graph): gm = GraphMatcher(graph) - quantization_types_pattern = \ - 'QTZ_linear_mid_tread_half' - matches = list() - p = Pattern(quantization_types_pattern, - [ - Pattern('BatchNormalization', - [ - Pattern('Conv'), - Pattern('*'), - Pattern('*'), - Pattern('*'), - Pattern('*') - ]), - Pattern('*'), - Pattern('*'), - ]) + p = Pattern('QTZ_linear_mid_tread_half') gm.get_op_type_matches(p, matches) @@ -267,24 +253,19 @@ def pass_compute_thresholds(graph): # TODO: Neil-san, please use this to apply your threshold. 'p' is the path from qtz to conv (both included) # TODO: Neil-san, you can access to the quantizers thorugh 'conv.a_quantizer' and 'conv.quantizer' - # p = [m.node] - # while p[-1].op_type != 'Conv': - # non_variable_input = [inode for inode in p[-1].input_nodes - # if (not cast(Operator, inode).is_variable and inode.is_monotonic) - # or inode.op_type == 'Conv'] - # if len(non_variable_input) != 1: - # break - # p.append(non_variable_input[-1]) - # - # if p[-1].op_type != 'Conv': - # continue - # quantizer_conv_output_node = p[0] - # conv_node = p[-1] - - # TODO: Neil-san, you can delete this - quantizer_conv_output_node = m.node - batch_norm_node = quantizer_conv_output_node.input_nodes[0] - conv_node = batch_norm_node.input_nodes[0] + p = [m.node] + while p[-1].op_type != 'Conv': + non_variable_input = [inode for inode in p[-1].input_nodes + if (not cast(Operator, inode).is_variable and inode.is_monotonic) + or inode.op_type == 'Conv'] + if len(non_variable_input) != 1: + break + p.append(non_variable_input[-1]) + + if p[-1].op_type != 'Conv': + continue + quantizer_conv_output_node = p[0] + conv_node = p[-1] # TODO: Neil-san, you should keep this # check if this is a quantized convolution @@ -295,55 +276,46 @@ def pass_compute_thresholds(graph): quantizer_conv_weights.run_forward_no_scaling_factor() scaling_factor = quantizer_conv_weights.scaling_factor - ths = defaultdict(list) - computed_quantized_results = defaultdict(set) - magic_number = 2 - # TODO: make '3' function on the number of bits of the number of bits - for value in range(0, 3): - for idx in range(conv_node.channel): - - # assume that the output value will be a 16-bit signed integer - n = 2**15 - low = -n + 1 - high = n - 2 - - # binary search - while low <= high: - mid = low + (high - low) // 2 - input_data = (scaling_factor * mid) * 2.0 / 3.0 # TODO: get from quantizers (n_bits, max_value) - data_dict = batch_norm_node.run(data=input_data) - data_dict = quantizer_conv_output_node.run(data=data_dict['data']) - result = data_dict['data'][idx] - computed_quantized_results[idx].add(result) - - if result > value: - high = mid - 1 - else: - low = mid + 1 - - ths[idx].append(low) - - # TODO: Neil-san, you don't probably need this - # check if increasing, decreasing or constant - for channel, values in computed_quantized_results.items(): - if len(values) == 1: - ths[channel].append(values.pop() + magic_number) - else: - first_threshold_result = values.pop() - second_threshold_result = values.pop() - - if first_threshold_result < second_threshold_result: - ths[channel].append(1) + # assume that the output value will be a 16-bit signed integer + n = 2 ** 2 - 1 + ch = conv_node.channel + max_th_value = 2 ** 15 + + # The threshold_table is ndarray that holds the threshold values for all channels + threshold_table = np.empty([ch, n + 1], dtype=np.int32) + + # Compute threshold (t0, t1, t2) + for th_id, th_v in enumerate([0.5, 1.5, 2.5]): + init_threshold = np.full(ch, th_v, dtype=np.float64) + + # run calculation in reverse order: q -> bn -> scaling + # TODO: make sure the order of pattern is always valid + trans_th = {'data': init_threshold} + for op in p[:-1]: + trans_th = op.de_run(**trans_th) + threshold = (trans_th['data'] * np.float64(n)) / (np.float64(2.0) * scaling_factor) + + for ch_id, th_per_ch in enumerate(threshold): + if quantizer_conv_weights.op_type == 'QTZ_binary_channel_wise_mean_scaling': + threshold_table[ch_id, th_id] = int(math.floor(th_per_ch)) \ + if (scaling_factor[ch_id] < 0) ^ (ch_id in trans_th['nega_idx']) \ + else int(math.ceil(th_per_ch)) else: - ths[channel].append(-1) - - # TODO: Neil-san, you keep the things in a list already - # put everything into a list to be compatible with the rest of the code - ths_list = [] - for channel in sorted(ths.keys()): - ths_list += ths[channel] - conv_node.thresholds = ths_list + threshold_table[ch_id, th_id] = int(math.floor(th_per_ch)) \ + if (scaling_factor < 0) ^ (ch_id in trans_th['nega_idx']) \ + else int(math.ceil(th_per_ch)) + + # take care of threshold values that are larger than 16-bit signed integer + if abs(threshold_table[ch_id, th_id]) > max_th_value: + raise ValueError(f'the threshold value {th_id} is larger than 16-bit signed integer') + + for c in range(ch): + threshold_table[c, -1] = 1 \ + if np.all(threshold_table[c, 1:-1] > threshold_table[c, :-2], axis=0) else -1 + + # Put the thresholds into list + conv_node.thresholds = threshold_table.flatten().tolist() # TODO: Neil-san, you should keep this # Disconnect batchnorm and the quantizer From 299fe786a1f9d25a70bd625e06ea2335d12d21ef Mon Sep 17 00:00:00 2001 From: nlpng Date: Mon, 10 Dec 2018 18:38:25 +0900 Subject: [PATCH 08/45] Temperately fix for super large threshold value over 16bit int --- dlk/python/dlk/scripts/generate_project.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py index e9f05d378..8643ddf1e 100644 --- a/dlk/python/dlk/scripts/generate_project.py +++ b/dlk/python/dlk/scripts/generate_project.py @@ -280,7 +280,7 @@ def pass_compute_thresholds(graph): # assume that the output value will be a 16-bit signed integer n = 2 ** 2 - 1 ch = conv_node.channel - max_th_value = 2 ** 15 + max_th_value = 2 ** 15 - 1 # The threshold_table is ndarray that holds the threshold values for all channels threshold_table = np.empty([ch, n + 1], dtype=np.int32) @@ -306,13 +306,14 @@ def pass_compute_thresholds(graph): if (scaling_factor < 0) ^ (ch_id in trans_th['nega_idx']) \ else int(math.ceil(th_per_ch)) - # take care of threshold values that are larger than 16-bit signed integer - if abs(threshold_table[ch_id, th_id]) > max_th_value: - raise ValueError(f'the threshold value {th_id} is larger than 16-bit signed integer') + # take care of threshold values that are larger than 16-bit signed integer + threshold_table[abs(threshold_table) > max_th_value] = max_th_value for c in range(ch): threshold_table[c, -1] = 1 \ if np.all(threshold_table[c, 1:-1] > threshold_table[c, :-2], axis=0) else -1 + if np.all(threshold_table[c, 1:-1] == threshold_table[c, :-2], axis=0): + threshold_table[c, -1] = 2 # Put the thresholds into list conv_node.thresholds = threshold_table.flatten().tolist() From 7052906f3f2e2edb5fe808d1108f0add228f5121 Mon Sep 17 00:00:00 2001 From: nlpng Date: Tue, 11 Dec 2018 10:59:52 +0900 Subject: [PATCH 09/45] Applying conditions for hq and ts --- dlk/python/dlk/scripts/generate_project.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py index 8643ddf1e..09b7f3e0f 100644 --- a/dlk/python/dlk/scripts/generate_project.py +++ b/dlk/python/dlk/scripts/generate_project.py @@ -312,6 +312,7 @@ def pass_compute_thresholds(graph): for c in range(ch): threshold_table[c, -1] = 1 \ if np.all(threshold_table[c, 1:-1] > threshold_table[c, :-2], axis=0) else -1 + # Applying the magic number if np.all(threshold_table[c, 1:-1] == threshold_table[c, :-2], axis=0): threshold_table[c, -1] = 2 @@ -452,12 +453,15 @@ def optimize_graph_step(model: Model, config: Config) -> None: pass_print(graph, 'After transpose') pass_dot_graph(graph, '/tmp/transposed.dot') - pass_propagate_quantization_details_into_conv(graph) - pass_print(graph, 'After propagate') - - pass_compute_thresholds(graph) - pass_pack_weights(graph) - pass_quantize_convolutions(graph) + if config.activate_hard_quantization: + pass_propagate_quantization_details_into_conv(graph) + pass_print(graph, 'After propagate') + + if config.threshold_skipping: + pass_compute_thresholds(graph) + pass_pack_weights(graph) + pass_quantize_convolutions(graph) + pass_propagate_datatypes(graph) processed_nodes = [] From a669ca592f47a21bf6b6f04bbfc7bd5ed2640a39 Mon Sep 17 00:00:00 2001 From: nlpng Date: Tue, 11 Dec 2018 13:19:59 +0900 Subject: [PATCH 10/45] Move the axis input of split operator to attribute --- dlk/python/dlk/core/view.py | 2 +- dlk/python/dlk/plugins/tf.py | 3 +++ dlk/python/dlk/scripts/generate_project.py | 7 +++---- dlk/python/dlk/templates/include/func/split.h | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/dlk/python/dlk/core/view.py b/dlk/python/dlk/core/view.py index d711576a0..668725de9 100644 --- a/dlk/python/dlk/core/view.py +++ b/dlk/python/dlk/core/view.py @@ -706,7 +706,7 @@ def run(self): """ ) elif self.op.op_type == 'Split': - if len(input_ops) != 2: + if len(input_ops) != 1: self.raise_invalid_args_exception(op, input_ops, output_ops) inputs_string = self.inputs_to_string(input_ops) diff --git a/dlk/python/dlk/plugins/tf.py b/dlk/python/dlk/plugins/tf.py index 584fc1e6f..c652698ce 100644 --- a/dlk/python/dlk/plugins/tf.py +++ b/dlk/python/dlk/plugins/tf.py @@ -1027,6 +1027,9 @@ def infer_dtype() -> DataType: dimension_format=current_format, split=num_split ) + input_axis_name = input_ops_order[0] + nodes_to_remove.append(new_op.input_ops[input_axis_name]) + new_op.remove_input(input_axis_name) else: raise UnsupportedNode( f'TensorFlow importer cannot convert {op_type} operator node!') diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py index 09b7f3e0f..328c00653 100644 --- a/dlk/python/dlk/scripts/generate_project.py +++ b/dlk/python/dlk/scripts/generate_project.py @@ -277,10 +277,10 @@ def pass_compute_thresholds(graph): scaling_factor = quantizer_conv_weights.scaling_factor # TODO: make '3' function on the number of bits of the number of bits - # assume that the output value will be a 16-bit signed integer n = 2 ** 2 - 1 ch = conv_node.channel - max_th_value = 2 ** 15 - 1 + # assume that the threshold values will be a 13-bit signed integer + max_th_value = 2 ** 12 - 1 # The threshold_table is ndarray that holds the threshold values for all channels threshold_table = np.empty([ch, n + 1], dtype=np.int32) @@ -290,7 +290,6 @@ def pass_compute_thresholds(graph): init_threshold = np.full(ch, th_v, dtype=np.float64) # run calculation in reverse order: q -> bn -> scaling - # TODO: make sure the order of pattern is always valid trans_th = {'data': init_threshold} for op in p[:-1]: trans_th = op.de_run(**trans_th) @@ -461,7 +460,7 @@ def optimize_graph_step(model: Model, config: Config) -> None: pass_compute_thresholds(graph) pass_pack_weights(graph) pass_quantize_convolutions(graph) - + pass_propagate_datatypes(graph) processed_nodes = [] diff --git a/dlk/python/dlk/templates/include/func/split.h b/dlk/python/dlk/templates/include/func/split.h index 8960025b0..524a88b17 100644 --- a/dlk/python/dlk/templates/include/func/split.h +++ b/dlk/python/dlk/templates/include/func/split.h @@ -20,7 +20,7 @@ limitations under the License. #include "time_measurement.h" template -void func_Split(int32_t axis, T input[], T *outputs[], T_UINT num_split, T_UINT out_height, T_UINT out_width, T_UINT out_depth) +void func_Split(T input[], T *outputs[], T_UINT num_split, T_UINT out_height, T_UINT out_width, T_UINT out_depth) { Measurement::Start("func_SpliT"); From 15b91353f9b8fa8507d94709bb43fbc7a9af910f Mon Sep 17 00:00:00 2001 From: Antonio Date: Tue, 11 Dec 2018 18:45:21 +0900 Subject: [PATCH 11/45] Propagate output data type to last quantized convolution --- dlk/python/dlk/scripts/generate_project.py | 47 +++++++++++----------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py index 328c00653..6810229d9 100644 --- a/dlk/python/dlk/scripts/generate_project.py +++ b/dlk/python/dlk/scripts/generate_project.py @@ -45,23 +45,6 @@ ROOT_DIR = path.abspath(path.join(SCRITPS_DIR, '../../..')) -def pass_print(graph: Graph, name=str()): - - gm = GraphMatcher(graph) - - print('--- ', name, '---') - matches = list() - p = Pattern("*") - gm.get_op_type_matches(p, matches) - - for m in matches: - print('Match: ', m.node.name, m.node.op_type, m.node.dimension) - for input_node in m.node.input_nodes: - print(' -> ', input_node.name, input_node.op_type) - - print('---') - - def pass_dot_graph(graph: Graph, filename): dot_script = 'digraph {' @@ -427,6 +410,28 @@ def pass_propagate_datatypes(graph): m.node.dtype = m.node.input_nodes[0].dtype +def pass_propagate_output_type_backward(graph): + + gm = GraphMatcher(graph) + + matches = list() + p = Pattern('*') + + gm.get_op_type_matches(p, matches) + + def find_input(node, otype): + for n in node.input_nodes: + if n.op_type == 'Conv' and n.is_quantized: + n.dtype = otype + return + find_input(n, otype) + + output_node = matches[-1].node + + output_type = output_node.dtype + find_input(output_node, output_type) + + def optimize_graph_step(model: Model, config: Config) -> None: """Optimze graph in the model. @@ -441,23 +446,19 @@ def optimize_graph_step(model: Model, config: Config) -> None: """ graph: Graph = model.graph - pass_print(graph, 'Before') pass_dot_graph(graph, '/tmp/original.dot') pass_remove_identities(graph) - pass_print(graph, 'After identity') pass_dot_graph(graph, '/tmp/prune_identities.dot') pass_transpose(graph) - pass_print(graph, 'After transpose') pass_dot_graph(graph, '/tmp/transposed.dot') if config.activate_hard_quantization: pass_propagate_quantization_details_into_conv(graph) - pass_print(graph, 'After propagate') - if config.threshold_skipping: pass_compute_thresholds(graph) + pass_propagate_output_type_backward(graph) pass_pack_weights(graph) pass_quantize_convolutions(graph) @@ -466,8 +467,6 @@ def optimize_graph_step(model: Model, config: Config) -> None: processed_nodes = [] while pass_precompute(graph, processed_nodes=processed_nodes): pass - pass_print(graph, 'After precompute') - pass_dot_graph(graph, '/tmp/final.dot') optim = Optimizer() From 6301b579df07121db3476d84e6cf3052aeed24fe Mon Sep 17 00:00:00 2001 From: nlpng Date: Wed, 12 Dec 2018 08:34:57 +0900 Subject: [PATCH 12/45] Type change has no effect, let's move it around... --- dlk/python/dlk/scripts/generate_project.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py index 6810229d9..ef72eaefa 100644 --- a/dlk/python/dlk/scripts/generate_project.py +++ b/dlk/python/dlk/scripts/generate_project.py @@ -458,10 +458,12 @@ def optimize_graph_step(model: Model, config: Config) -> None: pass_propagate_quantization_details_into_conv(graph) if config.threshold_skipping: pass_compute_thresholds(graph) - pass_propagate_output_type_backward(graph) + # pass_propagate_output_type_backward(graph) pass_pack_weights(graph) pass_quantize_convolutions(graph) + if config.threshold_skipping: + pass_propagate_output_type_backward(graph) pass_propagate_datatypes(graph) processed_nodes = [] From 3a9aecf3bb81417eaff0a856e5753d48cf7bb63c Mon Sep 17 00:00:00 2001 From: nlpng Date: Wed, 12 Dec 2018 09:40:50 +0900 Subject: [PATCH 13/45] Fix PEP8s --- dlk/python/dlk/core/operators.py | 2 -- dlk/python/dlk/scripts/generate_project.py | 9 ++------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py index 082ea8a16..a8b234ac8 100644 --- a/dlk/python/dlk/core/operators.py +++ b/dlk/python/dlk/core/operators.py @@ -775,8 +775,6 @@ def binarizer(self, data: np.ndarray) -> np.ndarray: f'operator {self.op_type} need to implement the binarizer method') - - class QTZ_binary_mean_scaling(Quantizer): """Quantization operator using binary scaling. diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py index ef72eaefa..410f45f24 100644 --- a/dlk/python/dlk/scripts/generate_project.py +++ b/dlk/python/dlk/scripts/generate_project.py @@ -55,12 +55,6 @@ def pass_dot_graph(graph: Graph, filename): code[node.name] = counter counter += 1 - # for node in graph.operators: - # for input_node in node.input_nodes: - # - # dot_script += '"' + format(code[input_node.name], '04X') + '-' + input_node.op_type + '"' + ' -> ' \ - # + '"' + format(code[node.name], '04X') + '-' + node.op_type + '"' + ';' - for node in graph.operators: shape = '-' @@ -68,7 +62,8 @@ def pass_dot_graph(graph: Graph, filename): shape = 'x'.join(str(x) for x in node.shape) shape += '(' + node.dimension + ')' - dot_script += node.name + '[label=" ' + format(code[node.name], '04X') + '| ' + node.op_type + '| ' + shape + '| ' + node.dtype.cpptype() + '" shape = "record"];' + dot_script += node.name + '[label=" ' + format(code[node.name], '04X') + '| ' + \ + node.op_type + '| ' + shape + '| ' + node.dtype.cpptype() + '" shape = "record"];' for i in node.input_nodes: dot_script += i.name + ' -> ' + node.name + ';' From e6532815ee66203c9354fed0e57855310adb5b94 Mon Sep 17 00:00:00 2001 From: nlpng Date: Wed, 12 Dec 2018 10:07:24 +0900 Subject: [PATCH 14/45] Fix PEP8 again --- dlk/python/dlk/scripts/generate_project.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py index 410f45f24..3b5a10709 100644 --- a/dlk/python/dlk/scripts/generate_project.py +++ b/dlk/python/dlk/scripts/generate_project.py @@ -63,7 +63,7 @@ def pass_dot_graph(graph: Graph, filename): shape += '(' + node.dimension + ')' dot_script += node.name + '[label=" ' + format(code[node.name], '04X') + '| ' + \ - node.op_type + '| ' + shape + '| ' + node.dtype.cpptype() + '" shape = "record"];' + node.op_type + '| ' + shape + '| ' + node.dtype.cpptype() + '" shape = "record"];' for i in node.input_nodes: dot_script += i.name + ' -> ' + node.name + ';' @@ -229,8 +229,6 @@ def pass_compute_thresholds(graph): for m in matches: - # TODO: Neil-san, please use this to apply your threshold. 'p' is the path from qtz to conv (both included) - # TODO: Neil-san, you can access to the quantizers thorugh 'conv.a_quantizer' and 'conv.quantizer' p = [m.node] while p[-1].op_type != 'Conv': non_variable_input = [inode for inode in p[-1].input_nodes @@ -245,7 +243,6 @@ def pass_compute_thresholds(graph): quantizer_conv_output_node = p[0] conv_node = p[-1] - # TODO: Neil-san, you should keep this # check if this is a quantized convolution if not conv_node.quantizer or not conv_node.a_quantizer: continue From eaa98571c049c87f33db6314829e2bd177f46103 Mon Sep 17 00:00:00 2001 From: nlpng Date: Wed, 12 Dec 2018 18:14:45 +0900 Subject: [PATCH 15/45] Refactoring optimizer with new passes --- dlk/python/dlk/core/optimizer.py | 857 ++++++++------------- dlk/python/dlk/scripts/generate_project.py | 408 +--------- 2 files changed, 317 insertions(+), 948 deletions(-) diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py index 27e03939a..c7a869cf5 100644 --- a/dlk/python/dlk/core/optimizer.py +++ b/dlk/python/dlk/core/optimizer.py @@ -14,632 +14,401 @@ # limitations under the License. # ============================================================================= """Module of optimization passes.""" +import math import numpy as np -from core.data_types import DataType, Float32, Float64, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64, \ - Bool, String, QUANTIZED_NOT_PACKED -from core.graph import Graph, GraphRunner -from core.operators import Add, AveragePool, BatchNormalization, Constant, Conv, Identity, Input, \ - MaxPool, Operator, Output, Transpose, Quantizer, QTZ_binary_mean_scaling, QTZ_linear_mid_tread_half, \ - Reshape, Softmax, Relu, Flatten, Dropout, Gemm, SpaceToDepth, QTZ_binary_channel_wise_mean_scaling, ConcatOnDepth,\ - Maximum, DepthToSpace, Split, Variable from typing import Any, Dict, List, Optional, Set, cast -from functools import reduce -from enum import Enum +from core.graph import Graph +from core.graph_pattern_matching import GraphMatcher, Pattern, match_to_execution_list, NodeMatch +from core.operators import Constant, Operator +from core.data_types import Uint32, QUANTIZED_NOT_PACKED +from typing import cast +from collections import defaultdict from modules.packer import Packer -NodeGroup = List[Operator] +def pass_dot_graph(graph: Graph, filename) -> None: -def node_is_add(node: Operator) -> bool: - return node.op_type == 'Add' + dot_script = 'digraph {' + code = {} + counter = 0 + for node in graph.operators: + code[node.name] = counter + counter += 1 -def node_is_conv(node: Operator) -> bool: - return node.op_type == 'Conv' + for node in graph.operators: + shape = '-' + if node.shape: + shape = 'x'.join(str(x) for x in node.shape) + shape += '(' + node.dimension + ')' -def node_is_concat(node: Operator) -> bool: - return node.op_type == 'ConcatV2' + dot_script += node.name + '[label=" ' + format(code[node.name], '04X') + '| ' + \ + node.op_type + '| ' + shape + '| ' + node.dtype.cpptype() + '" shape = "record"];' + for i in node.input_nodes: + dot_script += i.name + ' -> ' + node.name + ';' + dot_script += '}' -def node_is_const(node: Operator) -> bool: - return node.op_type == 'Constant' + with open(filename, 'w') as f: + f.write(dot_script) -def node_is_qconv(node: Operator) -> bool: - return node.op_type == 'Conv' and cast(Conv, node).is_quantized +def pass_remove_identities(graph: Graph) -> None: + gm = GraphMatcher(graph) -def node_is_input(node: Operator) -> bool: - return node.op_type == 'Input' + to_be_removed = list() + matches: List[NodeMatch] = list() + p = Pattern("Identity") + gm.get_op_type_matches(p, matches) + for m in matches: + """skip all identity.""" + in_op = m.node.input_ops['input'] + out_ops = m.node.output_ops['output'] + for out_op in out_ops: + for k, v in out_op.input_ops.items(): + if v == m.node: + # change the output's input to this identity's input + out_op.add_input(k, in_op) + # change the input's output to this identity's output + for k2, v2 in in_op.output_ops.items(): + if m.node in v2: + v2.remove(m.node) + v2.append(out_op) + break + break -def node_is_weight_quantizer(node: Operator) -> bool: - return (node.op_type == 'QTZ_binary_mean_scaling' - or node.op_type == 'QTZ_binary_channel_wise_mean_scaling') + to_be_removed.append(m.node) + for op in to_be_removed: + graph.remove_op(op) -def node_is_activation_quantizer(node: Operator) -> bool: - return node.op_type == 'QTZ_linear_mid_tread_half' +def pass_transpose(graph: Graph) -> None: -class NHWC_Transposer(GraphRunner): - """Transposer of all nodes to NHWC.""" + gm = GraphMatcher(graph) - def _get_permutation(self, dim: str) -> List[int]: - """Create a permutation from the source dimension.""" - assert len(dim) == 4 and set(dim).issubset({'N', 'H', 'W', 'C', 'I', 'O'}), \ - f'illegal dimension found: {dim}' + matches: List[NodeMatch] = list() + p = Pattern("*") + gm.get_op_type_matches(p, matches) - if set(dim) == set('HWIO'): - dim = dim.replace('I', 'C') - dim = dim.replace('O', 'N') + for m in matches: + dim = m.node.dimension + shape = m.node.shape + if len(shape) != 4 or len(dim) != 4 or not set(dim).issubset({'N', 'H', 'W', 'C', 'I', 'O'}): + continue - return list(map(lambda s: dim.index(s), 'NHWC')) + dim = dim.replace('I', 'C') + dim = dim.replace('O', 'N') - def _check_and_transpose(self, node: Operator) -> None: - perm = self._get_permutation(node.dimension) - node.transpose(perm) + permutation = list(map(lambda s: dim.index(s), 'NHWC')) + m.node.transpose(permutation) - def run_backward_input(self, node: Input, **kwargs: Any) -> None: - self._check_and_transpose(node) - def run_backward_constant(self, node: Constant, **kwargs: Any) -> None: - if node.ndims == 4 and set(node.dimension).issubset({'N', 'H', 'W', 'C', 'I', 'O'}): - self._check_and_transpose(node) +def pass_precompute(graph: Graph, processed_nodes) -> bool: - def run_backward_identity(self, node: Identity, **kwargs: Any) -> None: - if node.ndims == 4 and set(node.dimension).issubset({'N', 'H', 'W', 'C', 'I', 'O'}): - self._check_and_transpose(node) + gm = GraphMatcher(graph) - def run_backward_QTZ_binary_mean_scaling(self, node: QTZ_binary_mean_scaling, **kwargs: Any) -> None: - self._check_and_transpose(node) + matches: List[NodeMatch] = list() + p = Pattern('*') + gm.get_op_type_matches(p, matches) - def run_backward_transpose(self, node: Transpose, **kwargs: Any) -> None: - raise NotImplementedError('Transposing Transpose operator is not supported yet.') + processed_before_precompute = len(processed_nodes) - def run_backward_conv(self, node: Conv, **kwargs: Any) -> None: - self._check_and_transpose(node) + for m in matches: + if m.node in processed_nodes: + continue - def run_backward_batch_normalization(self, node: BatchNormalization, **kwargs: Any) -> None: - self._check_and_transpose(node) + # We want operators with inputs + if not m.node.input_nodes: + continue - def run_backward_QTZ_linear_mid_tread_half(self, node: QTZ_linear_mid_tread_half, **kwargs: Any) -> None: - self._check_and_transpose(node) + precomputable = True + for input_node in m.node.input_nodes: + if input_node.op_type != 'Constant': + precomputable = False - def run_backward_max_pool(self, node: MaxPool, **kwargs: Any) -> None: - self._check_and_transpose(node) + if not precomputable: + continue - def run_backward_average_pool(self, node: AveragePool, **kwargs: Any) -> None: - self._check_and_transpose(node) + processed_nodes += m.node.input_nodes + processed_nodes.append(m.node) - def run_backward_SpaceToDepth(self, node: SpaceToDepth, **kwargs: Any) -> None: - self._check_and_transpose(node) + data = m.node.run_forward() - def run_backward_QTZ_binary_channel_wise_mean_scaling( - self, - node: QTZ_binary_channel_wise_mean_scaling, - **kwargs: Any) -> None: - self._check_and_transpose(node) + new_constant = Constant( + m.node.name + '_new', + m.node.dtype, + data, + dimension_format=m.node.dimension + ) - def run_backward_ConcatOnDepth(self, node: ConcatOnDepth, **kwargs: Any) -> None: - self._check_and_transpose(node) + graph.add_op(new_constant) - def run_backward_Maximum(self, node: Maximum, **kwargs: Any) -> None: - self._check_and_transpose(node) + new_constant.add_outputs(m.node.output_ops) + for output_name, consumer_list in m.node.output_ops.items(): + for consumer_node in consumer_list: + for input_name, input_node in consumer_node.input_ops.items(): + if input_node == m.node: + consumer_node.add_input(input_name, new_constant) + break + return len(processed_nodes) > processed_before_precompute - def run_backward_DepthToSpace(self, node: DepthToSpace, **kwargs: Any) -> None: - self._check_and_transpose(node) +def pass_propagate_quantization_details_into_conv(graph: Graph) -> None: -class PreComputeRunner(GraphRunner): - """Optimization class that does precomputation and pruning on the graph. + gm = GraphMatcher(graph) - Fron a constant node, this object precomputes as far as possible, and - replaces all precomputed nodes with a newly defined constant node. + matches: List[NodeMatch] = list() + p = Pattern('*') + gm.get_op_type_matches(p, matches) - Additionally, in the hard-quantized mode, this object replaces a - weight-quantizer node and succesive Conv node with a QConv node, and - packs the weight. - """ + qtypes = [ + 'QTZ_binary_mean_scaling', + 'QTZ_linear_mid_tread_half', + 'QTZ_binary_channel_wise_mean_scaling' + ] - _quantized_bitwidth = 1 - _wordsize = 32 + quant_details = defaultdict(list) + for m in matches: + if not m.node.preserve_quantization: + quant_details[m.node.name] = [] + continue - def __init__(self, graph: Graph, hard_quantized: bool = False) -> None: - """Set up internal varibles.""" - self._precomp_dic: Dict[str, bool] = {} - self._nodes_removed: Set[Operator] = set() - self._hard_quantized = hard_quantized - self._quantizers: Dict[str, Quantizer] = {} # the operator name and its quantizer - self._connected_convs: Dict[Operator, List[Conv]] = {} # node name and its connected convolver + if m.node.op_type == 'Conv': + input_node = m.node.input_nodes[0] + weight_node = m.node.input_nodes[1] - super().__init__(graph) + m.node.a_quantizer = [input_node] if input_node.op_type in qtypes else quant_details[input_node.name] + m.node.quantizer = weight_node if weight_node.op_type in qtypes else quant_details[weight_node.name] - def initialize(self, **kwargs: Any) -> None: - qconvs: List[Conv] = kwargs['qconv'] - self._connected_convs = {q: [q] for q in qconvs} + quant_details[m.node.name] = [] + else: + qtzs = [] + for n in m.node.input_nodes: + if n.op_type in qtypes: + qtzs.append(n) + else: + for q in quant_details[n.name]: + qtzs.append(q) + + quant_details[m.node.name] = qtzs if len(qtzs) == len(m.node.input_nodes) else [] + # TODO: check if the quantizers use same n_bits + + +def pass_compute_thresholds(graph: Graph) -> None: + + gm = GraphMatcher(graph) + + matches: List[NodeMatch] = list() + p = Pattern('QTZ_linear_mid_tread_half') + + gm.get_op_type_matches(p, matches) + + for m in matches: + + p = [m.node] + while p[-1].op_type != 'Conv': + non_variable_input = [inode for inode in p[-1].input_nodes + if (not cast(Operator, inode).is_variable and inode.is_monotonic) + or inode.op_type == 'Conv'] + if len(non_variable_input) != 1: + break + p.append(non_variable_input[-1]) + + if p[-1].op_type != 'Conv': + continue + quantizer_conv_output_node = p[0] + conv_node = p[-1] + + # check if this is a quantized convolution + if not conv_node.quantizer or not conv_node.a_quantizer: + continue + + quantizer_conv_weights = conv_node.quantizer + quantizer_conv_weights.run_forward_no_scaling_factor() + scaling_factor = quantizer_conv_weights.scaling_factor + + # Getting the bit and max value + nbits = [] + max_vs = [] + for aqtz in conv_node.a_quantizer: + nbits.append(aqtz.nbit) + max_vs.append(aqtz.max_v) + if not (len(set(nbits)) == 1) and not (len(set(max_vs)) == 1): + raise ValueError(f'bits {nbits} or max values {max_vs} are not consistent') + else: + nbit = nbits[0] + max_v = max_vs[0] + + n = 2 ** nbit - 1 + ch = conv_node.channel + # assume that the threshold values will be a 13-bit signed integer + max_th_value = 2 ** 12 - 1 + + # The threshold_table is numpy array that holds the threshold values for all channels + threshold_table = np.empty([ch, n + 1], dtype=np.int32) - def finalize(self, **kwargs: Any) -> None: - """Remove all unused nodes from the graph.""" - for n in self._nodes_removed: - self._graph.remove_op(n) + # Compute threshold (t0, t1, t2) + th_val = [0.5 + i for i in range(n)] + for th_id, th_v in enumerate(th_val): + init_threshold = np.full(ch, th_v, dtype=np.float64) + + # run calculation in reverse order: q -> bn -> scaling + trans_th = {'data': init_threshold} + for op in p[:-1]: + trans_th = op.de_run(**trans_th) + threshold = (trans_th['data'] * np.float64(n)) / (np.float64(max_v) * scaling_factor) - # 1st phase: check which conv the node connects + for ch_id, th_per_ch in enumerate(threshold): + if quantizer_conv_weights.op_type == 'QTZ_binary_channel_wise_mean_scaling': + threshold_table[ch_id, th_id] = int(math.floor(th_per_ch)) \ + if (scaling_factor[ch_id] < 0) ^ (ch_id in trans_th['nega_idx']) \ + else int(math.ceil(th_per_ch)) + else: + threshold_table[ch_id, th_id] = int(math.floor(th_per_ch)) \ + if (scaling_factor < 0) ^ (ch_id in trans_th['nega_idx']) \ + else int(math.ceil(th_per_ch)) - def run_backward_by_default(self, node: Operator, **kwargs: Any) -> None: - outputs = node.output_op_list + # take care of threshold values that are larger than 16-bit signed integer + threshold_table[abs(threshold_table) > max_th_value] = max_th_value - convs: List[Conv] = sum([self._connected_convs[out] for out in outputs if self._connected_convs.get(out)], []) - self._connected_convs[node] = convs + for c in range(ch): + threshold_table[c, -1] = 1 \ + if np.all(threshold_table[c, 1:-1] > threshold_table[c, :-2], axis=0) else -1 + # Applying the magic number + if np.all(threshold_table[c, 1:-1] == threshold_table[c, :-2], axis=0): + threshold_table[c, -1] = 2 - def run_backward_conv(self, node: Conv, **kwargs: Any) -> None: - pass # do nothing, as all (quantized) conv node is already registered to self._connected_convs + # Put the thresholds into list + conv_node.thresholds = threshold_table.flatten().tolist() - # 2nd phase: precompute and prune + # Disconnect batchnorm and the quantizer + out_ops = quantizer_conv_output_node.output_ops['output'] + for output_node in out_ops: + for input_name, input_node in output_node.input_ops.items(): + if input_node == quantizer_conv_output_node: + output_node.add_input(input_name, conv_node) - def _has_precompute_value(self, op: Operator) -> bool: - """Return True if the operator has precompute value.""" - return self._precomp_dic[op.name] + conv_node.remove_output('Y') + conv_node.add_outputs({'Y': out_ops}) - def _is_prunable(self, op: Operator) -> bool: - """Return True if op can be prunable.""" - return self._has_precompute_value(op) and op.op_type != 'Constant' - def _prune(self, node: Operator) -> None: - """Prune the node and its inputs.""" - # prune inputs - for i in node.input_ops.values(): - if i not in self._nodes_removed: - self._prune(i) +def pass_pack_weights(graph: Graph) -> None: - # prune itself - self._nodes_removed.add(node) + gm = GraphMatcher(graph) - def _precompute_or_prune_inputs(self, node: Operator) -> None: - """Precompute itself or prune the input nodes. + quantization_types = [ + 'QTZ_binary_mean_scaling', + 'QTZ_linear_mid_tread_half', + 'QTZ_binary_channel_wise_mean_scaling' + ] - If all input has precompute value, then make the node precompute. - Otherwise, all prunable input nodes are pruned and substituted with - a new constant node. - """ - ops: List[Operator] = [node.input_ops[i] for i in node.input_names if node.input_ops.get(i)] - ops_have_precomp_values = list(map(lambda x: self._has_precompute_value(x), ops)) - ops_are_prunable = list(map(lambda x: self._is_prunable(x), ops)) - ops_are_in_quantized = list(map(lambda x: x.name in self._quantizers.keys(), ops)) + matches: List[NodeMatch] = list() + p = Pattern('Conv') - # check which input node can be pruned - if reduce(lambda x, y: x and y, ops_have_precomp_values): # all input has concrete values - node.run_forward() - self._precomp_dic[node.name] = True # this node can be pruned - if reduce(lambda x, y: x or y, ops_are_in_quantized): # some input operator to be quantized exists - quantizers = {op.name: self._quantizers[op.name] for op in ops if self._quantizers.get(op.name)} - if len(quantizers) > 1: - ValueError(f'{node.name}: multiple quantized inputs with {node.op_type} are not supported.') - self._quantizers[node.name] = list(quantizers.values())[0] + gm.get_op_type_matches(p, matches) - else: - self._precomp_dic[node.name] = False + # TODO: pass proper parameters + packer = Packer(1, 32) - # prune input opetarots - for key, op in zip(node.input_names, ops): - if self._is_prunable(op): - # get scaling factor if it is to be quantized but not in hard quantization mode - scaling = 1 if self._quantizers.get(op.name) is None \ - else self._quantizers[op.name].scaling_factor + for m in matches: + conv_node = m.node - extra_dims = tuple(np.ones((len(op.data.shape) - len(scaling.shape)), dtype=np.int32)) - scaling = scaling.reshape(scaling.shape + extra_dims) + # check if this is a quantized convolution + if not conv_node.quantizer or not conv_node.a_quantizer: + continue - # creates new constant - new_op = Constant( - op.name + '_new', - op.dtype, - op.data * scaling, - dimension_format=op.dimension - ) + weight_quantizer = conv_node.quantizer + if weight_quantizer.op_type not in quantization_types: + continue - # replace and prune the old operators - node.add_input(key, new_op) - self._graph.add_op(new_op) - self._prune(op) + # Quantize the weights + weight_quantizer.run_forward() + op_data = weight_quantizer.binarizer(weight_quantizer.data) + data = packer.run(op_data.astype(np.float32), weight_quantizer.dimension) - def run_forward_by_default(self, node: Operator, **kwargs: Any) -> None: - self._precompute_or_prune_inputs(node) + quantized_constant = Constant( + weight_quantizer.name + '_new', + Uint32(), + data, + packed=True, + actual_shape=weight_quantizer.shape + ) - def run_forward_input(self, node: Input, **kwargs: Any) -> None: - self._precomp_dic[node.name] = False + graph.add_op(quantized_constant) - def run_forward_constant(self, node: Constant, **kwargs: Any) -> None: - self._precomp_dic[node.name] = True + quantized_constant.add_outputs(weight_quantizer.output_ops) + for output_name, consumer_list in weight_quantizer.output_ops.items(): + for consumer_node in consumer_list: + for input_name, input_node in consumer_node.input_ops.items(): + if input_node == weight_quantizer: + consumer_node.add_input(input_name, quantized_constant) + break - def run_forward_identity(self, node: Identity, **kwargs: Any) -> None: - """skip all identity.""" - in_op = node.input_ops['input'] - out_ops = node.output_ops['output'] - for out_op in out_ops: - for k, v in out_op.input_ops.items(): - if v == node: - # change the output's input to this identity's input - out_op.add_input(k, in_op) - # change the input's output to this identity's output - for k2, v2 in in_op.output_ops.items(): - if node in v2: - v2.remove(node) - v2.append(out_op) - break - break - def run_forward_QTZ_binary_mean_scaling(self, node: QTZ_binary_mean_scaling, **kwargs: Any) -> None: - in_op = node.input_ops['input'] +def pass_quantize_convolutions(graph: Graph) -> None: - # if it can be precomputed - if self._has_precompute_value(in_op): - node.run_forward() - self._precomp_dic[node.name] = True # this node can be pruned - self._quantizers[node.name] = node # add itself as the quantizer - else: - self._precomp_dic[node.name] = False - - def run_forward_conv(self, node: Conv, **kwargs: Any) -> None: - ops: List[Operator] = [node.input_ops[i] for i in node.input_names if node.input_ops.get(i)] - - if self._hard_quantized and node in kwargs['qconv']: - # data is to be packed - ops_have_precomp_values = list(map(lambda x: self._has_precompute_value(x), ops)) - ops_are_prunable = list(map(lambda x: self._is_prunable(x), ops)) - - # check which input node can be pruned - if reduce(lambda x, y: x and y, ops_have_precomp_values): # all input has concrete values - node.run_forward() - self._precomp_dic[node.name] = True # this node can be pruned - quantizers = {op.name: self._quantizers[op.name] for op in ops if self._quantizers.get(op.name)} - if len(quantizers) > 1: - ValueError(f'{node.name}: multiple quantized inputs with {node.op_type} are not supported.') - self._quantizers[node.name] = list(quantizers.values())[0] - - else: # an input (must be weight) is to be quantized and packed - self._precomp_dic[node.name] = False - node.is_quantized = True - packer = Packer(self._quantized_bitwidth, self._wordsize) - quantizers = {op.name: self._quantizers[op.name] for op in ops if self._quantizers.get(op.name)} - if len(quantizers) > 1: - ValueError(f'{node.name}: multiple quantized inputs with {node.op_type} are not supported.') - node.quantizer = list(quantizers.values())[0] - - for key, op in zip(node.input_names, ops): - - if self._is_prunable(op): - shape = op.shape - op_data = node.quantizer.binarizer(op.data) - data = packer.run(op_data.astype(np.float32), op.dimension) - dtype = op.dtype - new_op = Constant( - op.name + '_new', - dtype, - data, - packed=True, - actual_shape=shape - ) - node.add_input(key, new_op) - self._graph.add_op(new_op) - self._prune(op) + gm = GraphMatcher(graph) - else: - self._precompute_or_prune_inputs(node) - - def run_forward_QTZ_binary_channel_wise_mean_scaling( - self, - node: QTZ_binary_channel_wise_mean_scaling, - **kwargs: Any) -> None: - in_op = node.input_ops['input'] - - # if it can be precomputed - if self._has_precompute_value(in_op): - node.run_forward() - self._precomp_dic[node.name] = True # this node can be pruned - self._quantizers[node.name] = node # add itself as the quantizer - else: - self._precomp_dic[node.name] = False - - -class DTypeChanger(GraphRunner): - """Optimization class that changes dypes. - - This runner must run before PrecomputeRunner. - """ - - class Path(Enum): - INPUT = 1, - WEIGHT = 2, - OTHER = 3 - - _packed_dtype = {Path.INPUT: QUANTIZED_NOT_PACKED(), Path.WEIGHT: Uint32(), Path.OTHER: Float32()} - _a_quantizers = {'QTZ_linear_mid_tread_half'} - _w_quantizers = {'QTZ_binary_mean_scaling', 'QTZ_binary_channel_wise_mean_scaling'} - _conv = {'Conv'} - - def __init__(self, graph: Graph) -> None: - """Set up internal varibles.""" - self._output_convs: Dict[Operator, List[Conv]] = {} - self._packed_input_path: Dict[str, Any] = {} - - super().__init__(graph, depth_first=False) - - # 1st phase: check nodes which dtype must be changed - - def _check_dtype_state(self, node: Operator) -> None: - """checks the state of each node regarding dtype. - - - whether the node is after conv and before activation quantizer - - whether the node is after activation and before conv - """ - outputs = node.output_op_list - convs: List[Conv] = sum([self._output_convs[out] for out in outputs if self._output_convs.get(out) is not None], - []) - - # determine the path of node is input or weight or others - path = self.Path.WEIGHT - for out in outputs: - p = self._packed_input_path[out.name] if out.op_type not in self._conv \ - else self.Path.INPUT if node == out.input_ops['X'] \ - else self.Path.WEIGHT - if path == self.Path.WEIGHT: - path = p - elif path == p: - pass - else: # output have different paths - ValueError('multiple outputs must have the same kind of paths.') - - is_not_before_a_quantizer = reduce(lambda x, y: x and y, - [out.op_type not in self._a_quantizers for out in outputs]) - if convs and is_not_before_a_quantizer: - self._output_convs[node] = convs - - self._packed_input_path[node.name] = path - - def run_backward_by_default(self, node: Operator, **kwargs: Any) -> None: - self._check_dtype_state(node) - - def run_backward_output(self, node: Output, **kwargs: Any) -> None: - self._packed_input_path[node.name] = self.Path.OTHER - - def run_backward_conv(self, node: Conv, **kwargs: Any) -> None: - self._output_convs[node] = [node] - - # 2nd phase: change data type - - def turn(self, **kwargs: Any) -> None: - """Set up qconv list""" - output_convs: List[Conv] = sum(list(self._output_convs.values()), []) - for conv in output_convs: - # get all ascendants of conv - ascendants = [k for k in self._output_convs.keys() if conv in self._output_convs[k]] - - # whether some weight quantizer is in ascendants - wqtz_in_asc = reduce(lambda x, y: x or y, - list(map(lambda n: n.op_type in self._w_quantizers, ascendants))) - # whether some activation quantizer is in ascendants - aqtz_in_asc = reduce(lambda x, y: x or y, - list(map(lambda n: n.op_type in self._a_quantizers, ascendants))) - # if both, add conv to the list - if wqtz_in_asc and aqtz_in_asc: - kwargs['qconv'].add(conv) - - def _set_dtype(self, node: Operator, qconv: List[Conv]) -> None: - def before_qconv() -> bool: - """Return if the node is before a quantized convolver""" - convs: List[Conv] = self._output_convs[node] if self._output_convs.get(node) else [] - # consistency check - is_qconv: List[bool] = list(map(lambda x: x in qconv, convs)) - all_is_qconv = reduce(lambda x, y: x and y, is_qconv, True) - some_is_qconv = reduce(lambda x, y: x or y, is_qconv, False) - assert convs == [] or (all_is_qconv == some_is_qconv), \ - f'{node.name} connects to both of a quantized convolver and non-quantized one.' - - return convs != [] and all_is_qconv - - def get_dtype() -> Optional[DataType]: - """Return dtype along with which path the node is on: 'input' or 'weight' of a conv""" - path = self._packed_input_path.get(node.name) - return self._packed_dtype[path] if path is not None else None - - dtype = get_dtype() - conv = self._output_convs.get(node) - if dtype is not None and before_qconv(): - node.dtype = dtype - - def run_forward_by_default(self, node: Operator, **kwargs: Any) -> None: - self._set_dtype(node, kwargs['qconv']) - - -class ApplyThresholdSkipping(GraphRunner): - """Optimization class that perform threshold skipping. - - This runner perform threshold skipping with BFS for DLK graph. - Run graphrunner backward to acquire graph info, and run forward - to compute the thresholds skip batchnorm and activation quantizer - with thresholding function. - """ - - def __init__(self, graph: Graph) -> None: - self._aqtz_aqtz: Dict[Operator, List[Operator]] = {} - self._qconv_qconv: Dict[Conv, List] = {} - super().__init__(graph, depth_first=False) - - def _apply_threshold_skipping(self, op_lst: List[Operator]) -> None: - """Performs Conv thresholds computation and skipping.""" - - transitions: Dict[int, Operator] = {} - start, finish = [None, None] - for idx, op in enumerate(op_lst): - if node_is_qconv(op): - start = cast(Conv, op) - elif node_is_activation_quantizer(op): - finish = op - transitions[idx] = op - else: - transitions[idx] = op - - if start is not None and finish is not None: - - # def linear_qtz2float(x: np.ndarray, n_value: int, max_value: float) -> np.ndarray: - # real_x = x / np.float64(n_value) * np.float64(max_value) - # return real_x.astype(np.float64) - - # Step 1: Compute thresholds for Convolution operators - # aqtzer = cast(Quantizer, start.a_quantizer[0]) # Activation Quantizers should all have the same bits - # bit = aqtzer.nbit - # max_v = aqtzer.max_v - # if bit is None or max_v is None: - # ValueError(f'activation quantizer of node {start.name} has bit or max value of None') - - # n = 2 ** bit - 1 - # ch = start.channel - # lch = start.input_ops['X'].channel - # k = start.kernel_height * start.kernel_width * lch * n - # qtzer = cast(Quantizer, start.quantizer) - # conv_results = [x for x in range(-k, k + 1, 1)] - # th_tmp = np.empty([ch, n + 1], dtype=np.int32) - # v_now = dict.fromkeys([x for x in range(ch)], 0) - # th_now = 0 - # val_neg_flag = -1 - # val_pos_flag = 1 - # all_transdata: Dict[int, Dict[str, Any]] = {} - - # Step 1-1: initalize thresholds - # for conv_res in conv_results: - # conv_out = np.full(ch, conv_res, dtype=np.float64) - # conv_out *= qtzer.scaling_factor if qtzer.scaling_factor is not None \ - # else ValueError(f'oops Quantizer of node {start.name} has scaling factor of None') - # - # conv_data = linear_qtz2float(conv_out, n, max_v) - # - # trans_data: Dict[str, Any] = {'data': conv_data} - # for idx, op in sorted(transitions.items(), reverse=True): - # trans_data = op.run(**trans_data) - # - # for depth in range(ch): - # init = -k if depth in trans_data['nega_idx'] else k - # th_tmp[depth, :] = init - # - # all_transdata[conv_res] = trans_data - - # Step 1-2: update thresholds - # for conv_res in conv_results: - # trans_data = all_transdata[conv_res] - # qtz_out = trans_data['data'] - # qtz_mu = np.mean(qtz_out) - # if qtz_mu != th_now: - # for depth in range(ch): - # is_negative = depth in trans_data['nega_idx'] - # if v_now.get(depth) != qtz_out[depth]: - # if is_negative: - # th_tmp[depth, abs(n - qtz_out[depth] - 1)] = conv_res - # else: - # th_tmp[depth, qtz_out[depth] - 1] = conv_res - # v_now[depth] = qtz_out[depth] - # th_tmp[depth, n] = -1 if is_negative else 1 - # for depth in range(ch): - # constant = reduce(lambda x, y: x and y, - # [th_tmp[depth, i] == th_tmp[depth, i + 1] for i in range(n - 1)]) - # th_tmp[depth, n] = qtz_out[depth] + 2 if constant else th_tmp[depth, n] - # # note: 2 above is a magic number. the result value must not be 1 nor -1. - # th_now = qtz_mu - - # start.thresholds = th_tmp.flatten().tolist() - - # Step 2: Skipping unused operators, e.g. batch normalization, linear activation quantizer - pass - # if start.has_thresholds: - # if start.dtype is not finish.dtype: - # start.dtype = finish.dtype - # for consumers in finish.output_ops.values(): - # for consumer in consumers: - # for idex, y in start.output_ops.items(): - # if not bool(set(consumers) & set(y)): - # start.remove_output(idex) - # start.add_output(idex, consumer) - # - # for indent, v in consumer.input_ops.items(): - # if v == finish: - # consumer.add_input(indent, start) - # break - else: - pass - - def _makeup_skippable(self, node: Operator) -> None: - outputs = node.output_op_list - for out_op in outputs: - for start, lst in self._aqtz_aqtz.items(): - if out_op in lst: - self._aqtz_aqtz[start].append(node) - - def _makeup_aqtz(self, node: Operator) -> None: - outputs = node.output_op_list - for out_op in outputs: - for start, lst in self._qconv_qconv.items(): - if out_op in lst: - self._qconv_qconv[start].append(node) - - def run_backward_QTZ_linear_mid_tread_half(self, node: QTZ_linear_mid_tread_half, **kwargs: Any) -> None: - self._aqtz_aqtz[node] = [node] - self._makeup_aqtz(node) - - def run_backward_by_default(self, node: Operator, **kwargs: Any) -> None: - if node.is_monotonic and not node_is_conv(node): - self._makeup_skippable(node) - self._makeup_aqtz(node) - - def run_backward_conv(self, node: Conv, **kwargs: Any) -> None: - self._makeup_skippable(node) - if node_is_qconv(node): - self._qconv_qconv[node] = [node] - - def run_forward_conv(self, node: Conv, **kwargs: Any) -> None: - bits: List[int] = [] - aqtzers: List[Quantizer] = [] - if node_is_qconv(node): - for x in self._qconv_qconv[node]: - if node_is_activation_quantizer(x): - bits.append(x.nbit) - aqtzers.append(x) - - if not (len(set(bits)) == 1): - ValueError('Values are not consistent') - else: - node.a_quantizer = aqtzers + matches: List[NodeMatch] = list() + p = Pattern('Conv') + gm.get_op_type_matches(p, matches) + + for m in matches: + conv_node = m.node + + # check if this is a quantized convolution + if not conv_node.quantizer or not conv_node.a_quantizer: + continue + + # Mark as quantized convolution + conv_node.is_quantized = True + + # change the output data type of the convolution if thresholds are available + if conv_node.has_thresholds: + conv_node.dtype = QUANTIZED_NOT_PACKED + + # change the output data type of the quantizers + conv_node.quantizer.dtype = Uint32 + for qtz in conv_node.a_quantizer: + qtz.dtype = QUANTIZED_NOT_PACKED + + +def pass_propagate_datatypes(graph) -> None: + + gm = GraphMatcher(graph) + + matches: List[NodeMatch] = list() + p = Pattern('*') + gm.get_op_type_matches(p, matches) - def run_forward_QTZ_linear_mid_tread_half(self, node: QTZ_linear_mid_tread_half, **kwargs: Any) -> None: - self._apply_threshold_skipping(self._aqtz_aqtz[node]) + for m in matches: + if m.node.op_type != 'Conv' and m.node.preserve_quantization: + m.node.dtype = m.node.input_nodes[0].dtype -class Optimizer(object): - """Class of optimization classes.""" +def pass_propagate_output_type_backward(graph: Graph) -> None: - def transpose_NHWC(self, graph: Graph) -> Graph: - runner = NHWC_Transposer(graph) - kwargs: Dict[str, Any] = {} - runner.run(**kwargs) - return graph + gm = GraphMatcher(graph) - def precompute(self, graph: Graph, hard_quantized: bool = False) -> Graph: - runner1 = DTypeChanger(graph) - runner2 = PreComputeRunner(graph, hard_quantized=hard_quantized) + matches: List[NodeMatch] = list() + p = Pattern('*') - kwargs: Dict[str, Set[Conv]] = {'qconv': set()} + gm.get_op_type_matches(p, matches) - # run - if hard_quantized: - runner1.run(**kwargs) - runner2.run(**kwargs) + def find_input(node, otype): + for n in node.input_nodes: + if n.op_type == 'Conv' and n.is_quantized: + n.dtype = otype + return + find_input(n, otype) - return graph + output_node = matches[-1].node - def threshold_skipping(self, graph: Graph) -> Graph: - runner1 = ApplyThresholdSkipping(graph) - kwargs: Dict[str, Any] = {} - runner1.run(**kwargs) - return graph + output_type = output_node.dtype + find_input(output_node, output_type) diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py index 3b5a10709..f2cff229b 100644 --- a/dlk/python/dlk/scripts/generate_project.py +++ b/dlk/python/dlk/scripts/generate_project.py @@ -20,410 +20,24 @@ - Generate all cpp source headers and other control files like Makefile. """ import click +import utils from os import path -import shutil -import math -import numpy as np from core.config import Config from core.graph import Graph from core.model import Model from core.params import Params -from core.optimizer import Optimizer from code_generater import CodeGenerater from frontend import TensorFlowIO -from core.graph_pattern_matching import GraphMatcher, Pattern, match_to_execution_list -from core.operators import Constant, Operator -from modules.packer import Packer -from core.data_types import Uint32, QUANTIZED_NOT_PACKED -from typing import cast -from collections import defaultdict -import utils +from core.optimizer import pass_dot_graph, pass_remove_identities, pass_transpose, pass_precompute, \ + pass_propagate_quantization_details_into_conv, pass_compute_thresholds, pass_pack_weights, \ + pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward SCRITPS_DIR = path.abspath(path.dirname(__file__)) DLK_ROOT_DIR = path.abspath(path.join(SCRITPS_DIR, '..')) ROOT_DIR = path.abspath(path.join(SCRITPS_DIR, '../../..')) -def pass_dot_graph(graph: Graph, filename): - - dot_script = 'digraph {' - - code = {} - counter = 0 - for node in graph.operators: - code[node.name] = counter - counter += 1 - - for node in graph.operators: - - shape = '-' - if node.shape: - shape = 'x'.join(str(x) for x in node.shape) - shape += '(' + node.dimension + ')' - - dot_script += node.name + '[label=" ' + format(code[node.name], '04X') + '| ' + \ - node.op_type + '| ' + shape + '| ' + node.dtype.cpptype() + '" shape = "record"];' - for i in node.input_nodes: - dot_script += i.name + ' -> ' + node.name + ';' - - dot_script += '}' - - with open(filename, 'w') as f: - f.write(dot_script) - - -def pass_remove_identities(graph: Graph): - - gm = GraphMatcher(graph) - - to_be_removed = list() - matches = list() - p = Pattern("Identity") - gm.get_op_type_matches(p, matches) - - for m in matches: - """skip all identity.""" - in_op = m.node.input_ops['input'] - out_ops = m.node.output_ops['output'] - for out_op in out_ops: - for k, v in out_op.input_ops.items(): - if v == m.node: - # change the output's input to this identity's input - out_op.add_input(k, in_op) - # change the input's output to this identity's output - for k2, v2 in in_op.output_ops.items(): - if m.node in v2: - v2.remove(m.node) - v2.append(out_op) - break - break - - to_be_removed.append(m.node) - - for op in to_be_removed: - graph.remove_op(op) - - -def pass_transpose(graph): - - gm = GraphMatcher(graph) - - matches = list() - p = Pattern("*") - gm.get_op_type_matches(p, matches) - - for m in matches: - dim = m.node.dimension - shape = m.node.shape - if len(shape) != 4 or len(dim) != 4 or not set(dim).issubset({'N', 'H', 'W', 'C', 'I', 'O'}): - continue - - dim = dim.replace('I', 'C') - dim = dim.replace('O', 'N') - - permutation = list(map(lambda s: dim.index(s), 'NHWC')) - m.node.transpose(permutation) - - -def pass_precompute(graph, processed_nodes): - - gm = GraphMatcher(graph) - - matches = list() - p = Pattern('*') - gm.get_op_type_matches(p, matches) - - processed_before_precompute = len(processed_nodes) - - for m in matches: - if m.node in processed_nodes: - continue - - # We want operators with inputs - if not m.node.input_nodes: - continue - - precomputable = True - for input_node in m.node.input_nodes: - if input_node.op_type != 'Constant': - precomputable = False - - if not precomputable: - continue - - processed_nodes += m.node.input_nodes - processed_nodes.append(m.node) - - data = m.node.run_forward() - - new_constant = Constant( - m.node.name + '_new', - m.node.dtype, - data, - dimension_format=m.node.dimension - ) - - graph.add_op(new_constant) - - new_constant.add_outputs(m.node.output_ops) - for output_name, consumer_list in m.node.output_ops.items(): - for consumer_node in consumer_list: - for input_name, input_node in consumer_node.input_ops.items(): - if input_node == m.node: - consumer_node.add_input(input_name, new_constant) - break - - return len(processed_nodes) > processed_before_precompute - - -def pass_propagate_quantization_details_into_conv(graph): - - gm = GraphMatcher(graph) - - matches = list() - p = Pattern('*') - gm.get_op_type_matches(p, matches) - - qtypes = [ - 'QTZ_binary_mean_scaling', - 'QTZ_linear_mid_tread_half', - 'QTZ_binary_channel_wise_mean_scaling' - ] - - quant_details = defaultdict(list) - for m in matches: - if not m.node.preserve_quantization: - quant_details[m.node.name] = [] - continue - - if m.node.op_type == 'Conv': - input_node = m.node.input_nodes[0] - weight_node = m.node.input_nodes[1] - - m.node.a_quantizer = [input_node] if input_node.op_type in qtypes else quant_details[input_node.name] - m.node.quantizer = weight_node if weight_node.op_type in qtypes else quant_details[weight_node.name] - - quant_details[m.node.name] = [] - else: - qtzs = [] - for n in m.node.input_nodes: - if n.op_type in qtypes: - qtzs.append(n) - else: - for q in quant_details[n.name]: - qtzs.append(q) - - quant_details[m.node.name] = qtzs if len(qtzs) == len(m.node.input_nodes) else [] - # TODO: check if the quantizers use same n_bits - - -def pass_compute_thresholds(graph): - - gm = GraphMatcher(graph) - - matches = list() - p = Pattern('QTZ_linear_mid_tread_half') - - gm.get_op_type_matches(p, matches) - - for m in matches: - - p = [m.node] - while p[-1].op_type != 'Conv': - non_variable_input = [inode for inode in p[-1].input_nodes - if (not cast(Operator, inode).is_variable and inode.is_monotonic) - or inode.op_type == 'Conv'] - if len(non_variable_input) != 1: - break - p.append(non_variable_input[-1]) - - if p[-1].op_type != 'Conv': - continue - quantizer_conv_output_node = p[0] - conv_node = p[-1] - - # check if this is a quantized convolution - if not conv_node.quantizer or not conv_node.a_quantizer: - continue - - quantizer_conv_weights = conv_node.quantizer - quantizer_conv_weights.run_forward_no_scaling_factor() - scaling_factor = quantizer_conv_weights.scaling_factor - - # TODO: make '3' function on the number of bits of the number of bits - n = 2 ** 2 - 1 - ch = conv_node.channel - # assume that the threshold values will be a 13-bit signed integer - max_th_value = 2 ** 12 - 1 - - # The threshold_table is ndarray that holds the threshold values for all channels - threshold_table = np.empty([ch, n + 1], dtype=np.int32) - - # Compute threshold (t0, t1, t2) - for th_id, th_v in enumerate([0.5, 1.5, 2.5]): - init_threshold = np.full(ch, th_v, dtype=np.float64) - - # run calculation in reverse order: q -> bn -> scaling - trans_th = {'data': init_threshold} - for op in p[:-1]: - trans_th = op.de_run(**trans_th) - threshold = (trans_th['data'] * np.float64(n)) / (np.float64(2.0) * scaling_factor) - - for ch_id, th_per_ch in enumerate(threshold): - if quantizer_conv_weights.op_type == 'QTZ_binary_channel_wise_mean_scaling': - threshold_table[ch_id, th_id] = int(math.floor(th_per_ch)) \ - if (scaling_factor[ch_id] < 0) ^ (ch_id in trans_th['nega_idx']) \ - else int(math.ceil(th_per_ch)) - else: - threshold_table[ch_id, th_id] = int(math.floor(th_per_ch)) \ - if (scaling_factor < 0) ^ (ch_id in trans_th['nega_idx']) \ - else int(math.ceil(th_per_ch)) - - # take care of threshold values that are larger than 16-bit signed integer - threshold_table[abs(threshold_table) > max_th_value] = max_th_value - - for c in range(ch): - threshold_table[c, -1] = 1 \ - if np.all(threshold_table[c, 1:-1] > threshold_table[c, :-2], axis=0) else -1 - # Applying the magic number - if np.all(threshold_table[c, 1:-1] == threshold_table[c, :-2], axis=0): - threshold_table[c, -1] = 2 - - # Put the thresholds into list - conv_node.thresholds = threshold_table.flatten().tolist() - - # TODO: Neil-san, you should keep this - # Disconnect batchnorm and the quantizer - out_ops = quantizer_conv_output_node.output_ops['output'] - for output_node in out_ops: - for input_name, input_node in output_node.input_ops.items(): - if input_node == quantizer_conv_output_node: - output_node.add_input(input_name, conv_node) - - conv_node.remove_output('Y') - conv_node.add_outputs({'Y': out_ops}) - - # TODO: temporary (only for drawing better graphs) - # batch_norm_node.remove_input('X') - - -def pass_pack_weights(graph): - - gm = GraphMatcher(graph) - - quantization_types = [ - 'QTZ_binary_mean_scaling', - 'QTZ_linear_mid_tread_half', - 'QTZ_binary_channel_wise_mean_scaling' - ] - - matches = list() - p = Pattern('Conv') - - gm.get_op_type_matches(p, matches) - - # TODO: pass proper parameters - packer = Packer(1, 32) - - for m in matches: - conv_node = m.node - - # check if this is a quantized convolution - if not conv_node.quantizer or not conv_node.a_quantizer: - continue - - weight_quantizer = conv_node.quantizer - if weight_quantizer.op_type not in quantization_types: - continue - - # Quantize the weights - weight_quantizer.run_forward() - op_data = weight_quantizer.binarizer(weight_quantizer.data) - data = packer.run(op_data.astype(np.float32), weight_quantizer.dimension) - - quantized_constant = Constant( - weight_quantizer.name + '_new', - Uint32(), - data, - packed=True, - actual_shape=weight_quantizer.shape - ) - - graph.add_op(quantized_constant) - - quantized_constant.add_outputs(weight_quantizer.output_ops) - for output_name, consumer_list in weight_quantizer.output_ops.items(): - for consumer_node in consumer_list: - for input_name, input_node in consumer_node.input_ops.items(): - if input_node == weight_quantizer: - consumer_node.add_input(input_name, quantized_constant) - break - - -def pass_quantize_convolutions(graph): - - gm = GraphMatcher(graph) - - matches = list() - p = Pattern('Conv') - gm.get_op_type_matches(p, matches) - - for m in matches: - conv_node = m.node - - # check if this is a quantized convolution - if not conv_node.quantizer or not conv_node.a_quantizer: - continue - - # Mark as quantized convolution - conv_node.is_quantized = True - - # change the output data type of the convolution if thresholds are available - if conv_node.has_thresholds: - conv_node.dtype = QUANTIZED_NOT_PACKED - - # change the output data type of the quantizers - conv_node.quantizer.dtype = Uint32 - for qtz in conv_node.a_quantizer: - qtz.dtype = QUANTIZED_NOT_PACKED - - -def pass_propagate_datatypes(graph): - - gm = GraphMatcher(graph) - - matches = list() - p = Pattern('*') - gm.get_op_type_matches(p, matches) - - for m in matches: - if m.node.op_type != 'Conv' and m.node.preserve_quantization: - m.node.dtype = m.node.input_nodes[0].dtype - - -def pass_propagate_output_type_backward(graph): - - gm = GraphMatcher(graph) - - matches = list() - p = Pattern('*') - - gm.get_op_type_matches(p, matches) - - def find_input(node, otype): - for n in node.input_nodes: - if n.op_type == 'Conv' and n.is_quantized: - n.dtype = otype - return - find_input(n, otype) - - output_node = matches[-1].node - - output_type = output_node.dtype - find_input(output_node, output_type) - - def optimize_graph_step(model: Model, config: Config) -> None: """Optimze graph in the model. @@ -437,20 +51,13 @@ def optimize_graph_step(model: Model, config: Config) -> None: """ graph: Graph = model.graph - - pass_dot_graph(graph, '/tmp/original.dot') - pass_remove_identities(graph) - pass_dot_graph(graph, '/tmp/prune_identities.dot') - pass_transpose(graph) - pass_dot_graph(graph, '/tmp/transposed.dot') if config.activate_hard_quantization: pass_propagate_quantization_details_into_conv(graph) if config.threshold_skipping: pass_compute_thresholds(graph) - # pass_propagate_output_type_backward(graph) pass_pack_weights(graph) pass_quantize_convolutions(graph) @@ -461,13 +68,6 @@ def optimize_graph_step(model: Model, config: Config) -> None: processed_nodes = [] while pass_precompute(graph, processed_nodes=processed_nodes): pass - pass_dot_graph(graph, '/tmp/final.dot') - - optim = Optimizer() - # optim.transpose_NHWC(graph) - # optim.precompute(graph, config.activate_hard_quantization) - # if config.threshold_skipping: - # optim.threshold_skipping(graph) def generate_code_step(model: Model, config: Config) -> None: From 0a36c2b865a8ed05a61474357177c9d0944be64b Mon Sep 17 00:00:00 2001 From: Antonio Date: Thu, 13 Dec 2018 09:54:48 +0900 Subject: [PATCH 16/45] Remove visited properties --- dlk/python/dlk/core/graph_pattern_matching.py | 28 +++++---- dlk/python/dlk/core/operators.py | 8 --- dlk/python/dlk/core/optimizer.py | 57 +++++++------------ dlk/python/dlk/scripts/generate_project.py | 2 +- 4 files changed, 37 insertions(+), 58 deletions(-) diff --git a/dlk/python/dlk/core/graph_pattern_matching.py b/dlk/python/dlk/core/graph_pattern_matching.py index 8ae3840a2..35f0a76a1 100644 --- a/dlk/python/dlk/core/graph_pattern_matching.py +++ b/dlk/python/dlk/core/graph_pattern_matching.py @@ -31,10 +31,8 @@ def __init__(self): self.inputs = list() -def sort_graph(graph, exec_list): - for node in graph.operators: - node.visited = False - +def sort_graph(graph): + exec_list = list() input_nodes = list() for node in graph.operators: input_nodes += [n.name for n in node.input_nodes] @@ -44,18 +42,24 @@ def sort_graph(graph, exec_list): if node not in input_nodes: output_nodes.append(node) + visited = {} + for node in graph.operators: + visited[node.name] = False + for node in output_nodes: - top_order(node, exec_list) + top_order(node, exec_list, visited) + + return exec_list -def top_order(output_node, exec_list): - if output_node.visited: +def top_order(output_node, exec_list, visited): + if visited[output_node.name]: return for input_node in output_node.input_nodes: - top_order(input_node, exec_list) + top_order(input_node, exec_list, visited) exec_list.append(output_node) - output_node.visited = True + visited[output_node.name] = True def match_to_execution_list(match, execution_list): @@ -67,7 +71,7 @@ def match_to_execution_list(match, execution_list): class GraphMatcher: def __init__(self, input_graph=Graph()): self.graph_node_list = list() - sort_graph(input_graph, self.graph_node_list) + self.graph_node_list = sort_graph(input_graph) self._node_map = {node.name: node for node in self.graph_node_list} @@ -76,7 +80,8 @@ def record_matched_nodes(self, match, matched_nodes): for input_node in match.inputs: self.record_matched_nodes(input_node, matched_nodes) - def get_op_type_matches(self, pattern, matches): + def get_op_type_matches(self, pattern): + matches = list() matched_nodes = set() for node in self.graph_node_list: if node in matched_nodes: @@ -86,6 +91,7 @@ def get_op_type_matches(self, pattern, matches): if self.does_op_type_match(node, pattern, matched_nodes, match): self.record_matched_nodes(match, matched_nodes) matches.append(match) + return matches def does_op_type_match(self, node, pattern, previously_matched_nodes, match): if node.name in previously_matched_nodes: diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py index a8b234ac8..19ae9b863 100644 --- a/dlk/python/dlk/core/operators.py +++ b/dlk/python/dlk/core/operators.py @@ -560,14 +560,6 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li """ raise NotImplementedError(f'operator {cls.__name__} cannot infer its shape.') - @property - def visited(self) -> bool: - return self._visited - - @visited.setter - def visited(self, v: Bool) -> None: - self._visited = v - @property def preserve_quantization(self) -> bool: return False diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py index c7a869cf5..70d4087a5 100644 --- a/dlk/python/dlk/core/optimizer.py +++ b/dlk/python/dlk/core/optimizer.py @@ -17,10 +17,8 @@ import math import numpy as np -from typing import Any, Dict, List, Optional, Set, cast - from core.graph import Graph -from core.graph_pattern_matching import GraphMatcher, Pattern, match_to_execution_list, NodeMatch +from core.graph_pattern_matching import GraphMatcher, Pattern from core.operators import Constant, Operator from core.data_types import Uint32, QUANTIZED_NOT_PACKED from typing import cast @@ -59,11 +57,10 @@ def pass_dot_graph(graph: Graph, filename) -> None: def pass_remove_identities(graph: Graph) -> None: gm = GraphMatcher(graph) + p = Pattern("Identity") + matches = gm.get_op_type_matches(p) to_be_removed = list() - matches: List[NodeMatch] = list() - p = Pattern("Identity") - gm.get_op_type_matches(p, matches) for m in matches: """skip all identity.""" @@ -91,10 +88,8 @@ def pass_remove_identities(graph: Graph) -> None: def pass_transpose(graph: Graph) -> None: gm = GraphMatcher(graph) - - matches: List[NodeMatch] = list() p = Pattern("*") - gm.get_op_type_matches(p, matches) + matches = gm.get_op_type_matches(p) for m in matches: dim = m.node.dimension @@ -112,10 +107,8 @@ def pass_transpose(graph: Graph) -> None: def pass_precompute(graph: Graph, processed_nodes) -> bool: gm = GraphMatcher(graph) - - matches: List[NodeMatch] = list() p = Pattern('*') - gm.get_op_type_matches(p, matches) + matches = gm.get_op_type_matches(p) processed_before_precompute = len(processed_nodes) @@ -162,10 +155,8 @@ def pass_precompute(graph: Graph, processed_nodes) -> bool: def pass_propagate_quantization_details_into_conv(graph: Graph) -> None: gm = GraphMatcher(graph) - - matches: List[NodeMatch] = list() p = Pattern('*') - gm.get_op_type_matches(p, matches) + matches = gm.get_op_type_matches(p) qtypes = [ 'QTZ_binary_mean_scaling', @@ -203,11 +194,8 @@ def pass_propagate_quantization_details_into_conv(graph: Graph) -> None: def pass_compute_thresholds(graph: Graph) -> None: gm = GraphMatcher(graph) - - matches: List[NodeMatch] = list() p = Pattern('QTZ_linear_mid_tread_half') - - gm.get_op_type_matches(p, matches) + matches = gm.get_op_type_matches(p) for m in matches: @@ -301,6 +289,8 @@ def pass_compute_thresholds(graph: Graph) -> None: def pass_pack_weights(graph: Graph) -> None: gm = GraphMatcher(graph) + p = Pattern('Conv') + matches = gm.get_op_type_matches(p) quantization_types = [ 'QTZ_binary_mean_scaling', @@ -308,13 +298,9 @@ def pass_pack_weights(graph: Graph) -> None: 'QTZ_binary_channel_wise_mean_scaling' ] - matches: List[NodeMatch] = list() - p = Pattern('Conv') - - gm.get_op_type_matches(p, matches) - - # TODO: pass proper parameters - packer = Packer(1, 32) + word_size = 32 + weight_bitwidth = 1 + packer = Packer(weight_bitwidth, word_size) for m in matches: conv_node = m.node @@ -323,6 +309,7 @@ def pass_pack_weights(graph: Graph) -> None: if not conv_node.quantizer or not conv_node.a_quantizer: continue + # Check if we support this kind of quantizer weight_quantizer = conv_node.quantizer if weight_quantizer.op_type not in quantization_types: continue @@ -332,6 +319,7 @@ def pass_pack_weights(graph: Graph) -> None: op_data = weight_quantizer.binarizer(weight_quantizer.data) data = packer.run(op_data.astype(np.float32), weight_quantizer.dimension) + # Create the new constant with the quantized weights quantized_constant = Constant( weight_quantizer.name + '_new', Uint32(), @@ -340,8 +328,8 @@ def pass_pack_weights(graph: Graph) -> None: actual_shape=weight_quantizer.shape ) + # Add the constant to the graph and connect the new constant graph.add_op(quantized_constant) - quantized_constant.add_outputs(weight_quantizer.output_ops) for output_name, consumer_list in weight_quantizer.output_ops.items(): for consumer_node in consumer_list: @@ -354,10 +342,8 @@ def pass_pack_weights(graph: Graph) -> None: def pass_quantize_convolutions(graph: Graph) -> None: gm = GraphMatcher(graph) - - matches: List[NodeMatch] = list() p = Pattern('Conv') - gm.get_op_type_matches(p, matches) + matches = gm.get_op_type_matches(p) for m in matches: conv_node = m.node @@ -382,10 +368,8 @@ def pass_quantize_convolutions(graph: Graph) -> None: def pass_propagate_datatypes(graph) -> None: gm = GraphMatcher(graph) - - matches: List[NodeMatch] = list() p = Pattern('*') - gm.get_op_type_matches(p, matches) + matches = gm.get_op_type_matches(p) for m in matches: if m.node.op_type != 'Conv' and m.node.preserve_quantization: @@ -395,11 +379,8 @@ def pass_propagate_datatypes(graph) -> None: def pass_propagate_output_type_backward(graph: Graph) -> None: gm = GraphMatcher(graph) - - matches: List[NodeMatch] = list() p = Pattern('*') - - gm.get_op_type_matches(p, matches) + matches = gm.get_op_type_matches(p) def find_input(node, otype): for n in node.input_nodes: @@ -408,7 +389,7 @@ def find_input(node, otype): return find_input(n, otype) + # propagate output data type to the last quantized convolution output_node = matches[-1].node - output_type = output_node.dtype find_input(output_node, output_type) diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py index f2cff229b..f29ad24ed 100644 --- a/dlk/python/dlk/scripts/generate_project.py +++ b/dlk/python/dlk/scripts/generate_project.py @@ -29,7 +29,7 @@ from core.params import Params from code_generater import CodeGenerater from frontend import TensorFlowIO -from core.optimizer import pass_dot_graph, pass_remove_identities, pass_transpose, pass_precompute, \ +from core.optimizer import pass_remove_identities, pass_transpose, pass_precompute, \ pass_propagate_quantization_details_into_conv, pass_compute_thresholds, pass_pack_weights, \ pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward From 925044ee6d132cf2847dbf2098f591e868b40f2a Mon Sep 17 00:00:00 2001 From: Antonio Date: Thu, 13 Dec 2018 10:01:01 +0900 Subject: [PATCH 17/45] Make find patterns easier --- dlk/python/dlk/core/graph_pattern_matching.py | 5 +++ dlk/python/dlk/core/optimizer.py | 42 +++++-------------- 2 files changed, 15 insertions(+), 32 deletions(-) diff --git a/dlk/python/dlk/core/graph_pattern_matching.py b/dlk/python/dlk/core/graph_pattern_matching.py index 35f0a76a1..dd003581c 100644 --- a/dlk/python/dlk/core/graph_pattern_matching.py +++ b/dlk/python/dlk/core/graph_pattern_matching.py @@ -31,6 +31,11 @@ def __init__(self): self.inputs = list() +def find_pattern(graph, pattern): + gm = GraphMatcher(graph) + return gm.get_op_type_matches(pattern) + + def sort_graph(graph): exec_list = list() input_nodes = list() diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py index 70d4087a5..f2d488ab3 100644 --- a/dlk/python/dlk/core/optimizer.py +++ b/dlk/python/dlk/core/optimizer.py @@ -18,7 +18,7 @@ import numpy as np from core.graph import Graph -from core.graph_pattern_matching import GraphMatcher, Pattern +from core.graph_pattern_matching import find_pattern, Pattern from core.operators import Constant, Operator from core.data_types import Uint32, QUANTIZED_NOT_PACKED from typing import cast @@ -55,11 +55,8 @@ def pass_dot_graph(graph: Graph, filename) -> None: def pass_remove_identities(graph: Graph) -> None: - - gm = GraphMatcher(graph) p = Pattern("Identity") - matches = gm.get_op_type_matches(p) - + matches = find_pattern(graph, p) to_be_removed = list() for m in matches: @@ -86,10 +83,8 @@ def pass_remove_identities(graph: Graph) -> None: def pass_transpose(graph: Graph) -> None: - - gm = GraphMatcher(graph) p = Pattern("*") - matches = gm.get_op_type_matches(p) + matches = find_pattern(graph, p) for m in matches: dim = m.node.dimension @@ -105,11 +100,8 @@ def pass_transpose(graph: Graph) -> None: def pass_precompute(graph: Graph, processed_nodes) -> bool: - - gm = GraphMatcher(graph) p = Pattern('*') - matches = gm.get_op_type_matches(p) - + matches = find_pattern(graph, p) processed_before_precompute = len(processed_nodes) for m in matches: @@ -153,11 +145,8 @@ def pass_precompute(graph: Graph, processed_nodes) -> bool: def pass_propagate_quantization_details_into_conv(graph: Graph) -> None: - - gm = GraphMatcher(graph) p = Pattern('*') - matches = gm.get_op_type_matches(p) - + matches = find_pattern(graph, p) qtypes = [ 'QTZ_binary_mean_scaling', 'QTZ_linear_mid_tread_half', @@ -192,10 +181,8 @@ def pass_propagate_quantization_details_into_conv(graph: Graph) -> None: def pass_compute_thresholds(graph: Graph) -> None: - - gm = GraphMatcher(graph) p = Pattern('QTZ_linear_mid_tread_half') - matches = gm.get_op_type_matches(p) + matches = find_pattern(graph, p) for m in matches: @@ -287,11 +274,8 @@ def pass_compute_thresholds(graph: Graph) -> None: def pass_pack_weights(graph: Graph) -> None: - - gm = GraphMatcher(graph) p = Pattern('Conv') - matches = gm.get_op_type_matches(p) - + matches = find_pattern(graph, p) quantization_types = [ 'QTZ_binary_mean_scaling', 'QTZ_linear_mid_tread_half', @@ -340,10 +324,8 @@ def pass_pack_weights(graph: Graph) -> None: def pass_quantize_convolutions(graph: Graph) -> None: - - gm = GraphMatcher(graph) p = Pattern('Conv') - matches = gm.get_op_type_matches(p) + matches = find_pattern(graph, p) for m in matches: conv_node = m.node @@ -366,10 +348,8 @@ def pass_quantize_convolutions(graph: Graph) -> None: def pass_propagate_datatypes(graph) -> None: - - gm = GraphMatcher(graph) p = Pattern('*') - matches = gm.get_op_type_matches(p) + matches = find_pattern(graph, p) for m in matches: if m.node.op_type != 'Conv' and m.node.preserve_quantization: @@ -377,10 +357,8 @@ def pass_propagate_datatypes(graph) -> None: def pass_propagate_output_type_backward(graph: Graph) -> None: - - gm = GraphMatcher(graph) p = Pattern('*') - matches = gm.get_op_type_matches(p) + matches = find_pattern(graph, p) def find_input(node, otype): for n in node.input_nodes: From 33754eab5c6fc8a516483a2f9ec00d3985e3fd89 Mon Sep 17 00:00:00 2001 From: Antonio Date: Thu, 13 Dec 2018 13:15:37 +0900 Subject: [PATCH 18/45] Delete disconnected nodes from graph and stop using graph runner on code generation --- dlk/python/dlk/core/graph.py | 9 +++--- dlk/python/dlk/core/graph_pattern_matching.py | 14 +++++++--- dlk/python/dlk/core/optimizer.py | 28 ++++++++++++++++--- dlk/python/dlk/scripts/generate_project.py | 2 +- 4 files changed, 40 insertions(+), 13 deletions(-) diff --git a/dlk/python/dlk/core/graph.py b/dlk/python/dlk/core/graph.py index 0ff8fc0c4..b659f91e1 100644 --- a/dlk/python/dlk/core/graph.py +++ b/dlk/python/dlk/core/graph.py @@ -23,6 +23,8 @@ Relu, Flatten, Dropout, Gemm, SpaceToDepth, Mul, QTZ_binary_channel_wise_mean_scaling, ConcatOnDepth, Maximum, \ DepthToSpace, Split +from core.graph_pattern_matching import sort_graph, find_pattern, Pattern + class Graph(object): """Graph class. This class was formerly named as 'Nodes'.""" @@ -109,10 +111,9 @@ def consts(self) -> List[Operator]: @property def non_variables(self) -> List[Operator]: - kwargs: Dict[str, List[Operator]] = {'node_list': []} - sorter = NodesSorter(self) - sorter.run(**kwargs) - return [node for node in kwargs['node_list'] if not cast(Operator, node).is_variable] + node_list = sort_graph(self) + node_list = [node for node in node_list if not cast(Operator, node).is_variable] + return node_list def find_node_by_op_type(self, op_type: str) -> List[Operator]: """Find nodes which op_type is specified by the argument. diff --git a/dlk/python/dlk/core/graph_pattern_matching.py b/dlk/python/dlk/core/graph_pattern_matching.py index dd003581c..95862d260 100644 --- a/dlk/python/dlk/core/graph_pattern_matching.py +++ b/dlk/python/dlk/core/graph_pattern_matching.py @@ -15,9 +15,6 @@ # ============================================================================= """Graph pattern matching module.""" -from core.operators import Operator -from core.graph import Graph - class Pattern: def __init__(self, op=str(), inputs=list()): @@ -67,6 +64,15 @@ def top_order(output_node, exec_list, visited): visited[output_node.name] = True +def get_nodes_in_branch(starting_node, stop_node, node_list): + if starting_node == stop_node: + return + node_list.append(starting_node) + + for node in starting_node.input_nodes: + get_nodes_in_branch(node, stop_node, node_list) + + def match_to_execution_list(match, execution_list): for input_node in match.inputs: match_to_execution_list(input_node, execution_list) @@ -74,7 +80,7 @@ def match_to_execution_list(match, execution_list): class GraphMatcher: - def __init__(self, input_graph=Graph()): + def __init__(self, input_graph): self.graph_node_list = list() self.graph_node_list = sort_graph(input_graph) diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py index f2d488ab3..ec259e2c9 100644 --- a/dlk/python/dlk/core/optimizer.py +++ b/dlk/python/dlk/core/optimizer.py @@ -18,7 +18,7 @@ import numpy as np from core.graph import Graph -from core.graph_pattern_matching import find_pattern, Pattern +from core.graph_pattern_matching import find_pattern, Pattern, get_nodes_in_branch from core.operators import Constant, Operator from core.data_types import Uint32, QUANTIZED_NOT_PACKED from typing import cast @@ -103,6 +103,7 @@ def pass_precompute(graph: Graph, processed_nodes) -> bool: p = Pattern('*') matches = find_pattern(graph, p) processed_before_precompute = len(processed_nodes) + to_be_removed = [] for m in matches: if m.node in processed_nodes: @@ -131,9 +132,11 @@ def pass_precompute(graph: Graph, processed_nodes) -> bool: data, dimension_format=m.node.dimension ) - graph.add_op(new_constant) + # get nodes to be removed after being disconnected + get_nodes_in_branch(m.node, None, to_be_removed) + new_constant.add_outputs(m.node.output_ops) for output_name, consumer_list in m.node.output_ops.items(): for consumer_node in consumer_list: @@ -141,6 +144,10 @@ def pass_precompute(graph: Graph, processed_nodes) -> bool: if input_node == m.node: consumer_node.add_input(input_name, new_constant) break + + for op in to_be_removed: + graph.remove_op(op) + return len(processed_nodes) > processed_before_precompute @@ -184,8 +191,8 @@ def pass_compute_thresholds(graph: Graph) -> None: p = Pattern('QTZ_linear_mid_tread_half') matches = find_pattern(graph, p) + to_be_removed = [] for m in matches: - p = [m.node] while p[-1].op_type != 'Conv': non_variable_input = [inode for inode in p[-1].input_nodes @@ -262,16 +269,23 @@ def pass_compute_thresholds(graph: Graph) -> None: # Put the thresholds into list conv_node.thresholds = threshold_table.flatten().tolist() - # Disconnect batchnorm and the quantizer + # get nodes to be removed after being disconnected + get_nodes_in_branch(quantizer_conv_output_node, conv_node, to_be_removed) + + # Disconnect the outputs of the quantizer out_ops = quantizer_conv_output_node.output_ops['output'] for output_node in out_ops: for input_name, input_node in output_node.input_ops.items(): if input_node == quantizer_conv_output_node: output_node.add_input(input_name, conv_node) + # Disconnect the outputs of the conv conv_node.remove_output('Y') conv_node.add_outputs({'Y': out_ops}) + for op in to_be_removed: + graph.remove_op(op) + def pass_pack_weights(graph: Graph) -> None: p = Pattern('Conv') @@ -285,6 +299,7 @@ def pass_pack_weights(graph: Graph) -> None: word_size = 32 weight_bitwidth = 1 packer = Packer(weight_bitwidth, word_size) + to_be_removed = [] for m in matches: conv_node = m.node @@ -312,6 +327,9 @@ def pass_pack_weights(graph: Graph) -> None: actual_shape=weight_quantizer.shape ) + # get nodes to be removed after being disconnected + get_nodes_in_branch(weight_quantizer, None, to_be_removed) + # Add the constant to the graph and connect the new constant graph.add_op(quantized_constant) quantized_constant.add_outputs(weight_quantizer.output_ops) @@ -322,6 +340,8 @@ def pass_pack_weights(graph: Graph) -> None: consumer_node.add_input(input_name, quantized_constant) break + for op in to_be_removed: + graph.remove_op(op) def pass_quantize_convolutions(graph: Graph) -> None: p = Pattern('Conv') diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py index f29ad24ed..8bd0a2d63 100644 --- a/dlk/python/dlk/scripts/generate_project.py +++ b/dlk/python/dlk/scripts/generate_project.py @@ -31,7 +31,7 @@ from frontend import TensorFlowIO from core.optimizer import pass_remove_identities, pass_transpose, pass_precompute, \ pass_propagate_quantization_details_into_conv, pass_compute_thresholds, pass_pack_weights, \ - pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward + pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward, pass_dot_graph SCRITPS_DIR = path.abspath(path.dirname(__file__)) DLK_ROOT_DIR = path.abspath(path.join(SCRITPS_DIR, '..')) From 93013ede4741e0ec4dca48d37c30ee83984daa72 Mon Sep 17 00:00:00 2001 From: Antonio Date: Thu, 13 Dec 2018 14:18:55 +0900 Subject: [PATCH 19/45] Deleted old code --- dlk/python/dlk/core/graph.py | 403 -------------------------- dlk/python/dlk/core/operators.py | 73 ----- dlk/python/dlk/frontend/tensorflow.py | 9 +- dlk/python/dlk/frontend/tf_export.py | 359 ----------------------- 4 files changed, 1 insertion(+), 843 deletions(-) delete mode 100644 dlk/python/dlk/frontend/tf_export.py diff --git a/dlk/python/dlk/core/graph.py b/dlk/python/dlk/core/graph.py index b659f91e1..6c569b041 100644 --- a/dlk/python/dlk/core/graph.py +++ b/dlk/python/dlk/core/graph.py @@ -167,406 +167,3 @@ def check_nodes(self) -> bool: return True - def accept(self, runner: 'GraphRunner', **kwargs: Any) -> None: - """Accept a graph runner and run it from the output node.""" - if TYPE_CHECKING: - import core.graph as gp - runner.initialize(**kwargs) - - if runner.depth_first: # depth first traversal - outputs = self.get_outputs() - for out in outputs: - out.accept(cast('gp.GraphRunner', runner), **kwargs) - - else: # breadth first traversal - # backward 1st - next = self.get_outputs() - if runner.is_lazy: - while next: - def get_visit_list(ops: List[Operator]) -> List[bool]: - return list(map(lambda n: runner.is_visited(cast(Operator, n)), ops)) - - def and_all(list: List[bool]) -> bool: - return functools.reduce(lambda x, y: x and y, list, True) - - # devide the `next` list into executables and non-executables - execs = [op for op in next if and_all(get_visit_list(op.output_op_list))] - non_execs = [op for op in next if not and_all(get_visit_list(op.output_op_list))] - - # if there is no executable operators, terminate this loop - if execs == []: - names = list(map(lambda x: x.name, non_execs)) - raise AssertionError(f'dead lock happened. {names} cannot run.') - - # execute - next = non_execs - for op in execs: - next += op.accept_backward(cast('gp.GraphRunner', runner), **kwargs) - else: - for op in next: - next += op.accept_backward(cast('gp.GraphRunner', runner), **kwargs) - - # turn - runner.turn(**kwargs) - - # forward run - next = self.get_inputs() + self.consts - if runner.is_lazy: - while next: - def get_inputs(op: Operator) -> List[Operator]: - return list(op.input_ops.values()) - - def get_visit_list(ops: List[Operator]) -> List[bool]: - return list(map(lambda n: not runner.is_visited(cast(Operator, n)), ops)) - - def and_all(list: List[bool]) -> bool: - return functools.reduce(lambda x, y: x and y, list, True) - - # devide the `next` list into executables and non-executables - execs = [op for op in next if and_all(get_visit_list(get_inputs(op)))] - non_execs = [op for op in next if not and_all(get_visit_list(get_inputs(op)))] - - # if there is no executable operators, terminate this loop - if execs == []: - names = list(map(lambda x: x.name, non_execs)) - raise AssertionError(f'dead lock happened. {names} cannot run.') - - # execute - next = non_execs - for op in execs: - next += op.accept_forward(cast('gp.GraphRunner', runner), **kwargs) - else: - for op in next: - next += op.accept_forward(cast('gp.GraphRunner', runner), **kwargs) - - runner.finalize(**kwargs) - - -class GraphRunner(object): - """Visitor class of a graph.""" - - def __init__(self, graph: Graph, depth_first: bool = True, lazy: bool = True) -> None: - """Set up the graph runner. - - Parameters - ---------- - graph : Graph - the graph to be traversed. - - depth_first : bool - a flag that represents if the running is done in a depth first manner. - Otherwise, this runner runs in a breadth first manner. It defaults to - True, i.e. a depth first traversal. - - lazy : bool - True if this runner runs in a lazy mode. This means all operator waits - for the traversal until the predecessors are traversed. - This flag is valid only in breadth-first mode. In the depth-first mode, - this is naturally true. - """ - self._graph = graph - self._visited: Set[str] = set() - self._dfs = depth_first - self._is_lazy = lazy - - def run(self, **kwargs: Any) -> None: - """Run this runner on the graph.""" - self._graph.accept(self, **kwargs) - - @property - def visited(self) -> Set[str]: - return set(self._visited) - - def visit(self, op: Operator) -> None: - self._visited.add(op.name) - - def unvisit(self, op: Operator) -> None: - self._visited.remove(op.name) - - def is_visited(self, node: Operator) -> bool: - return node.name in self._visited - - @property - def depth_first(self) -> bool: - """Returns True if this runs in a depth-first manner. - - Otherwise, this runs in a breadth-first manner. - """ - return self._dfs - - @property - def is_lazy(self) -> bool: - """Returns True if this runs in a lazy mode, i.e. all node waits until all of its predecessors are traversed. - - This flag is valide only in the breadth-first mode. - """ - return self._is_lazy - - def initialize(self, **kwargs: Any) -> None: - """Initialize the running. - - This method is called when the run starts. - """ - pass - - def turn(self, **kwargs: Any) -> None: - """Turn from backward to forward. - - This method is called only when the run is in a breadth-first manner. - """ - pass - - def finalize(self, **kwargs: Any) -> None: - """Finalize the running. - - This method is called when the run finishes. - """ - pass - - def run_backward_by_default(self, node: Operator, **kwargs: Any) -> None: - pass - - def run_forward_by_default(self, node: Operator, **kwargs: Any) -> None: - pass - - def run_backward_input(self, node: Input, **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_input(self, node: Input, **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - def run_backward_constant(self, node: Constant, **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_constant(self, node: Constant, **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - def run_backward_output(self, node: Output, **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_output(self, node: Output, **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - def run_backward_identity(self, node: Identity, **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_identity(self, node: Identity, **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - def run_backward_QTZ_binary_mean_scaling(self, node: QTZ_binary_mean_scaling, **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_QTZ_binary_mean_scaling(self, node: QTZ_binary_mean_scaling, **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - def run_backward_transpose(self, node: Transpose, **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_transpose(self, node: Transpose, **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - def run_backward_conv(self, node: Conv, **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_conv(self, node: Conv, **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - def run_backward_batch_normalization(self, node: BatchNormalization, **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_batch_normalization(self, node: BatchNormalization, **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - def run_backward_QTZ_linear_mid_tread_half(self, node: QTZ_linear_mid_tread_half, **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_QTZ_linear_mid_tread_half(self, node: QTZ_linear_mid_tread_half, **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - def run_backward_add(self, node: Add, **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_add(self, node: Add, **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - def run_backward_max_pool(self, node: MaxPool, **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_max_pool(self, node: MaxPool, **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - def run_backward_average_pool(self, node: AveragePool, **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_average_pool(self, node: AveragePool, **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - def run_backward_reshape(self, node: Reshape, **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_reshape(self, node: Reshape, **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - def run_backward_softmax(self, node: Softmax, **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_softmax(self, node: Softmax, **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - def run_backward_relu(self, node: Relu, **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_relu(self, node: Relu, **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - def run_backward_flatten(self, node: Flatten, **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_flatten(self, node: Flatten, **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - def run_backward_dropout(self, node: Dropout, **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_dropout(self, node: Dropout, **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - def run_backward_gemm(self, node: Gemm, **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_gemm(self, node: Gemm, **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - def run_backward_SpaceToDepth(self, node: SpaceToDepth, **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_SpaceToDepth(self, node: SpaceToDepth, **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - def run_backward_mul(self, node: Mul, **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_mul(self, node: Mul, **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - def run_backward_QTZ_binary_channel_wise_mean_scaling( - self, - node: QTZ_binary_channel_wise_mean_scaling, - **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_QTZ_binary_channel_wise_mean_scaling( - self, - node: QTZ_binary_channel_wise_mean_scaling, - **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - def run_backward_ConcatOnDepth(self, node: ConcatOnDepth, **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_ConcatOnDepth(self, node: ConcatOnDepth, **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - def run_backward_Maximum(self, node: Maximum, **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_Maximum(self, node: Maximum, **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - def run_backward_DepthToSpace(self, node: DepthToSpace, **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_DepthToSpace(self, node: DepthToSpace, **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - def run_backward_Split(self, node: Split, **kwargs: Any) -> None: - self.run_backward_by_default(node, **kwargs) - - def run_forward_Split(self, node: Split, **kwargs: Any) -> None: - self.run_forward_by_default(node, **kwargs) - - -class NodesSorter(GraphRunner): - """Class for sorting the nodes of a graph - - It will sort the nodes of a graph in topological order - """ - - def run_forward_input(self, node: Input, **kwargs: Any) -> None: - kwargs['node_list'].append(node) - - def run_forward_constant(self, node: Constant, **kwargs: Any) -> None: - kwargs['node_list'].append(node) - - def run_forward_output(self, node: Output, **kwargs: Any) -> None: - kwargs['node_list'].append(node) - - def run_forward_identity(self, node: Identity, **kwargs: Any) -> None: - kwargs['node_list'].append(node) - - def run_forward_QTZ_binary_mean_scaling(self, node: QTZ_binary_mean_scaling, **kwargs: Any) -> None: - kwargs['node_list'].append(node) - - def run_forward_transpose(self, node: Transpose, **kwargs: Any) -> None: - kwargs['node_list'].append(node) - - def run_forward_conv(self, node: Conv, **kwargs: Any) -> None: - kwargs['node_list'].append(node) - - def run_forward_batch_normalization(self, node: BatchNormalization, **kwargs: Any) -> None: - kwargs['node_list'].append(node) - - def run_forward_QTZ_linear_mid_tread_half(self, node: QTZ_linear_mid_tread_half, **kwargs: Any) -> None: - kwargs['node_list'].append(node) - - def run_forward_add(self, node: Add, **kwargs: Any) -> None: - kwargs['node_list'].append(node) - - def run_forward_max_pool(self, node: MaxPool, **kwargs: Any) -> None: - kwargs['node_list'].append(node) - - def run_forward_average_pool(self, node: AveragePool, **kwargs: Any) -> None: - kwargs['node_list'].append(node) - - def run_forward_reshape(self, node: Reshape, **kwargs: Any) -> None: - kwargs['node_list'].append(node) - - def run_forward_softmax(self, node: Softmax, **kwargs: Any) -> None: - kwargs['node_list'].append(node) - - def run_forward_relu(self, node: Relu, **kwargs: Any) -> None: - kwargs['node_list'].append(node) - - def run_forward_flatten(self, node: Flatten, **kwargs: Any) -> None: - kwargs['node_list'].append(node) - - def run_forward_dropout(self, node: Dropout, **kwargs: Any) -> None: - kwargs['node_list'].append(node) - - def run_forward_gemm(self, node: Gemm, **kwargs: Any) -> None: - kwargs['node_list'].append(node) - - def run_forward_SpaceToDepth(self, node: SpaceToDepth, **kwargs: Any) -> None: - kwargs['node_list'].append(node) - - def run_forward_mul(self, node: Mul, **kwargs: Any) -> None: - kwargs['node_list'].append(node) - - def run_forward_QTZ_binary_channel_wise_mean_scaling( - self, - node: QTZ_binary_channel_wise_mean_scaling, - **kwargs: Any) -> None: - kwargs['node_list'].append(node) - - def run_forward_ConcatOnDepth(self, node: ConcatOnDepth, **kwargs: Any) -> None: - kwargs['node_list'].append(node) - - def run_forward_Maximum(self, node: Maximum, **kwargs: Any) -> None: - kwargs['node_list'].append(node) - - def run_forward_DepthToSpace(self, node: DepthToSpace, **kwargs: Any) -> None: - kwargs['node_list'].append(node) - - def run_forward_Split(self, node: Split, **kwargs: Any) -> None: - kwargs['node_list'].append(node) - diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py index 19ae9b863..9f059fb35 100644 --- a/dlk/python/dlk/core/operators.py +++ b/dlk/python/dlk/core/operators.py @@ -25,7 +25,6 @@ from .data_types import * if TYPE_CHECKING: - from core.graph import GraphRunner import core.operators as ops Ops = Dict[str, 'Operator'] @@ -475,82 +474,10 @@ def run_forward(self) -> np.ndarray: raise NotImplementedError( f'operator {self.op_type} does not have runtime implemenatation yet.') - def accept(self, runner: 'GraphRunner', **kwargs: Any) -> None: - """Accept the graph runner and dispatch. - - This should not be accessed directly, but be called inside. - A bit conplicated use of the visitor pattern. - - runner : GraphRunner - Runner that runs through the graph from outputs to inputs (go backward), - then runs again from inputs to outputs (go forward). - - **kwargs : Any - Any keyward arguments that can be referred and updated during the run. - - """ - if runner.is_visited(cast('ops.Operator', self)): - return - - # run backward - self._dispatch_backward(runner, **kwargs) - - # go inside the inputs - for i in self.input_ops.values(): - i.accept(runner, **kwargs) - - # run forward - self._dispatch_forward(runner, **kwargs) - - # record visit - runner.visit(cast('ops.Operator', self)) - - def accept_backward(self, runner: 'GraphRunner', **kwargs: Any) -> List['Operator']: - """Accept the graph runner and dispatch for backward traversal, in a breadth-first .""" - if runner.is_visited(cast('ops.Operator', self)): - return [] - - # run backward - self._dispatch_backward(runner, **kwargs) - - # record visit - runner.visit(cast('ops.Operator', self)) - - # return its inputs as next accepters - return list(self._input_ops.values()) - - def accept_forward(self, runner: 'GraphRunner', **kwargs: Any) -> List['Operator']: - """Accept the graph runner and dispatch for forward traversal, in a breadth-first .""" - - # Note that all 'is_visited' flag is inverted, as this is already used in the backward run - if not runner.is_visited(cast('ops.Operator', self)): - return [] - - # run forward - self._dispatch_forward(runner, **kwargs) - - # record (un)visit - runner.unvisit(cast('ops.Operator', self)) - - # return its outputs as next accepters - return self.output_op_list - @property def _dispatch_name(self) -> str: return type(self).__name__.lower() - def _dispatch_backward(self, runner: 'GraphRunner', **kwargs: Any) -> None: - """Dispatch `runner.run_backward_xxx()` inside.""" - method_name = 'run_backward_' + self._dispatch_name - method_body = getattr(runner, method_name) - method_body(self, **kwargs) - - def _dispatch_forward(self, runner: 'GraphRunner', **kwargs: Any) -> None: - """Dispatch `runner.run_forward_xxx()` inside.""" - method_name = 'run_forward_' + self._dispatch_name - method_body = getattr(runner, method_name) - method_body(self, **kwargs) - @classmethod def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: List[str], attrs: Dict[str, Any]) -> List[int]: diff --git a/dlk/python/dlk/frontend/tensorflow.py b/dlk/python/dlk/frontend/tensorflow.py index 9ffef7d84..7a947b63f 100644 --- a/dlk/python/dlk/frontend/tensorflow.py +++ b/dlk/python/dlk/frontend/tensorflow.py @@ -17,11 +17,7 @@ from .base import BaseIO from core.model import Model from plugins.tf import Importer -import tensorflow as tf from tensorflow.core.framework import graph_pb2 -from tensorflow.python.lib.io import file_io -from frontend.tf_export import Exporter - from os import path @@ -59,7 +55,4 @@ def read(self, pb_path: str) -> Model: return model def write(self, model: Model, path: str) -> None: - graph: tf.Graph = Exporter.export_graph(model) - graph_def = graph.as_graph_def(add_shapes=True) - - file_io.atomic_write_string_to_file(path, graph_def.SerializeToString()) + raise NotImplementedError diff --git a/dlk/python/dlk/frontend/tf_export.py b/dlk/python/dlk/frontend/tf_export.py deleted file mode 100644 index 732762fff..000000000 --- a/dlk/python/dlk/frontend/tf_export.py +++ /dev/null @@ -1,359 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2018 The Blueoil Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= -"""Exporter from DLK to TensorFlow.""" - -import functools -from typing import Any, Dict, List, Optional - -import numpy as np -import tensorflow as tf -from tensorflow.python.framework.function import Defun - -from core import model as dlk -from core.data_types import DataType -from core.graph import Graph as dlk_Graph -from core.graph import GraphRunner -from core.operators import Add, AveragePool, BatchNormalization, Constant, Conv, Identity, Input, \ - MaxPool, Operator, Output, Transpose, QTZ_binary_mean_scaling, QTZ_linear_mid_tread_half, \ - Reshape, Softmax, Relu, Flatten, Dropout, Gemm - -TF_DTYPE_MAP: Dict[str, tf.DType] = { - 'Float16': tf.float16, - 'Float32': tf.float32, - 'Float64': tf.float64, - 'Uint8': tf.uint8, - 'Uint16': tf.uint16, - 'Uint32': None, - 'Uint64': None, - 'Int8': tf.int8, - 'Int16': tf.int16, - 'Int32': tf.int32, - 'Int64': tf.int64, - - 'Bool': tf.bool, - 'String': tf.string, -} - - -class Exporter(GraphRunner): - - @classmethod - def export_graph(cls, model: dlk.Model) -> tf.Graph: - dlk_graph = model.graph - - runner = cls(dlk_graph) - runner.run() - - return runner.tf_graph - - def __init__(self, graph: dlk_Graph) -> None: - self._tf_graph = tf.Graph() - self.tf_ops: Dict[str, tf.Tensor] = {} - self._formats: Dict[str, str] = {} - self._permutation: Dict[str, List[int]] = {} - super().__init__(graph) - - @property - def tf_graph(self) -> tf.Graph: - return self._tf_graph - - # initialize and finalize - - def initialize(self, **kwargs: Any) -> None: - """Set up TF's default graph""" - # self._tf_graph.as_default().__enter__() - - def finalize(self, **kwargs: Any) -> None: - """Release the TF default graph""" - # self._tf_graph.as_default().__exit__(None, None, None) - - # backward run: check the data format and transpose if needed - - def _transpose_weights(self, node: Operator) -> Optional[str]: - given_format = self._formats.get(node.name) - set_format = node.dimension - if given_format and len(given_format) == 4 and given_format != set_format: - perm = [set_format.index(s) for s in given_format] - self._permutation[node.name] = perm - - return given_format - - def run_backward_constant(self, node: Constant, **kwargs: Any) -> None: - self._transpose_weights(node) - - def _transpose_if_not_supported(self, node: Operator) -> None: - if node.dimension not in {'NHWC', 'NCHW'}: - perm = [node.dimension.index(s) for s in 'NHWC'] - self._permutation[node.name] = perm - - def run_backward_identity(self, node: Identity, **kwargs: Any) -> None: - given_format = self._transpose_weights(node) - if given_format: - self._formats[node.input_ops['input'].name] = given_format - - def run_backward_QTZ_binary_mean_scaling(self, node: QTZ_binary_mean_scaling, **kwargs: Any) -> None: - given_format = self._transpose_weights(node) - if given_format: - self._formats[node.input_ops['input'].name] = given_format - - def run_backward_transpose(self, node: Transpose, **kwargs: Any) -> None: - given_format = self._transpose_weights(node) - if given_format: - # change the input's format - perm = node.permutation - inv_perm = [perm[i] for i in range(len(perm))] # inverse the perm - transposed_form = functools.reduce( - lambda x, y: x + y, [given_format[i] for i in inv_perm]) - self._formats[node.input_ops['data'].name] = transposed_form - - def run_backward_conv(self, node: Conv, **kwargs: Any) -> None: - # if the format is not supported, change their order - self._transpose_if_not_supported(node) - self._formats[node.input_ops['W'].name] = 'HWCN' - - def run_backward_batch_normalization(self, node: BatchNormalization, **kwargs: Any) -> None: - given_format = self._transpose_weights(node) - if given_format: - self._formats[node.input_ops['X'].name] = given_format - - def run_backward_QTZ_linear_mid_tread_half(self, node: QTZ_linear_mid_tread_half, **kwargs: Any) -> None: - given_format = self._transpose_weights(node) - if given_format: - self._formats[node.input_ops['X'].name] = given_format - - def run_backward_max_pool(self, node: MaxPool, **kwargs: Any) -> None: - # if the format is not supported, change their order - self._transpose_if_not_supported(node) - - def run_backward_average_pool(self, node: AveragePool, **kwargs: Any) -> None: - # if the format is not supported, change their order - self._transpose_if_not_supported(node) - - # forward run: create tf operators - - def _get_tf_dtype(self, dlk_dtype: DataType) -> tf.DType: - dtype = TF_DTYPE_MAP.get(dlk_dtype.name()) - if dtype: - return dtype - else: - raise ValueError(f'dtype {dlk_dtype.name} is not supported.') - - def _get_transposed_or_not(self, node: Operator): - if node.name in self._permutation.keys(): - perm = self._permutation[node.name] - new_shape: List[int] = [node.shape[i] for i in perm] - new_dimension: str = functools.reduce( - lambda x, y: x + y, [node.dimension[i] for i in perm]) - new_data: np.ndarray = node.data.transpose(perm) - return new_shape, new_dimension, new_data - else: - return node.shape, node.dimension, node.data - - def run_forward_input(self, node: Input, **kwargs: Any) -> None: - new_shape, _, _ = self._get_transposed_or_not(node) - with self._tf_graph.as_default(): - x = tf.placeholder(self._get_tf_dtype(node.dtype), shape=new_shape, name=node.name) - self.tf_ops[node.name] = x - - def run_forward_constant(self, node: Constant, **kwargs: Any) -> None: - new_shape, _, new_data = self._get_transposed_or_not(node) - with self._tf_graph.as_default(): - x = tf.constant(new_data, dtype=self._get_tf_dtype(node.dtype), - shape=new_shape, - name=node.name) - self.tf_ops[node.name] = x - - def run_forward_output(self, node: Output, **kwargs: Any) -> None: - input = self.tf_ops[node.input_ops['input'].name] - with self._tf_graph.as_default(): - x = tf.identity(input, name=node.name) - self.tf_ops[node.name] = x - - def run_forward_identity(self, node: Identity, **kwargs: Any) -> None: - input = self.tf_ops[node.input_ops['input'].name] - with self._tf_graph.as_default(): - x = tf.identity(input, name=node.name) - self.tf_ops[node.name] = x - - def run_forward_QTZ_binary_mean_scaling(self, node: QTZ_binary_mean_scaling, **kwargs: Any) -> None: - - x = self.tf_ops[node.input_ops['input'].name] - - @Defun(self._get_tf_dtype(node.dtype), shape_func=lambda op: [op.inputs[0].get_shape()], - func_name='QTZ_binary_mean_scaling') - def _forward(x): - """Forward. - Args: - x(tf.Variable): The input to be quantized, weights normally. - Returns: - tf.Variable: The quantized input. - """ - expectation = tf.reduce_mean(tf.abs(x)) - return tf.sign(x) * expectation - - with self._tf_graph.as_default(): - output = _forward(x, name=node.name) - self.tf_ops[node.name] = output - - def run_forward_transpose(self, node: Transpose, **kwargs: Any) -> None: - perm = node.permutation - a = self.tf_ops[node.input_ops['data'].name] - with self._tf_graph.as_default(): - x = tf.transpose(a, perm, name=node.name) - self.tf_ops[node.name] = x - - def _get_padding2D(self, input_shape: List[int], kernel_shape: List[int]) -> str: - return 'SAME' if input_shape == kernel_shape else 'VALID' - - def run_forward_conv(self, node: Conv, **kwargs: Any) -> None: - if node.dilations != [1, 1, 1, 1]: - ValueError(f'Tensorflow v1.4 does not support dilations {node.dilations}') - - x = self.tf_ops[node.input_ops['X'].name] - w = self.tf_ops[node.input_ops['W'].name] - - inputs = [x, w] - dtypes = [self._get_tf_dtype(node.dtype)] - attrs: Dict[str, Any] = {} - - dim = node.dimension - strides = [1, *(node.strides), 1] if dim == 'NHWC' \ - else [1, 1, *(node.strides)] # dim == 'NCHW' - in_x = node.input_ops['X'] - padding = self._get_padding2D([in_x.height, in_x.width], [node.height, node.width]) - - with self._tf_graph.as_default(): - y = tf.nn.conv2d(x, w, strides, padding, name=node.name, - data_format=dim) - self.tf_ops[node.name] = y - - def run_forward_batch_normalization(self, node: BatchNormalization, **kwargs: Any) -> None: - x = self.tf_ops[node.input_ops['X'].name] - scale = self.tf_ops[node.input_ops['scale'].name] - b = self.tf_ops[node.input_ops['B'].name] - mean = self.tf_ops[node.input_ops['mean'].name] - var = self.tf_ops[node.input_ops['var'].name] - epsilon = node.epsilon - - # param_initializer = {'beta': b, 'gamma': scale, 'moving_mean': mean, 'moving_variance': var} - # test = tf.constant_initializer(10) - - with self._tf_graph.as_default(): - # b = tf.constant_initializer(b) - # scale = tf.constant_initializer(scale) - # mean = tf.constant_initializer(mean) - # var = tf.constant_initializer(var) - # y = tf.layers.batch_normalization(x, beta_initializer=b, gamma_initializer=scale, - # moving_mean_initializer=mean, - # moving_variance_initializer=var, - # epsilon=epsilon, fused=True) - y = tf.nn.fused_batch_norm(x, scale, b, mean=mean, variance=var, epsilon=epsilon, is_training=False, - name=node.name) - # y = tf.nn.batch_normalization(x, mean, var, b, scale, epsilon, name=node.name) - # y = tf.contrib.layers.batch_norm(x, center=True, scale=True, epsilon=epsilon, fused=True) - self.tf_ops[node.name] = y[0] - - def run_forward_QTZ_linear_mid_tread_half(self, node: QTZ_linear_mid_tread_half, **kwargs: Any) -> None: - x = self.tf_ops[node.input_ops['X'].name] - bit = self.tf_ops[node.input_ops['Y'].name] - max_value = self.tf_ops[node.input_ops['Z'].name] - - @Defun(self._get_tf_dtype(node.dtype), tf.int32, tf.float32, - shape_func=lambda op: [op.inputs[0].get_shape()], - func_name='QTZ_linear_mid_tread_half') - def _func(x, bit, max_value): - min_value = 0 - n = tf.pow(2., tf.cast(bit, dtype=tf.float32)) - 1 - value_range = max_value - min_value - - x = tf.clip_by_value(x, min_value, max_value, name="clip") - shifted = (x - min_value) / value_range - quantized = tf.round(shifted * n) / n - unshifted = quantized * value_range + min_value - return unshifted - - with self._tf_graph.as_default(): - output = _func(x, bit, max_value, name=node.name) - self.tf_ops[node.name] = output - - def run_forward_add(self, node: Add, **kwargs: Any) -> None: - x = self.tf_ops[node.input_ops['A'].name] - y = self.tf_ops[node.input_ops['B'].name] - - with self._tf_graph.as_default(): - c = tf.add(x, y, name=node.name) - self.tf_ops[node.name] = c - - def run_forward_max_pool(self, node: MaxPool, **kwargs: Any) -> None: - x = self.tf_ops[node.input_ops['X'].name] - ksize = [node.kernel_height, node.kernel_width] - strides = node.strides - in_x = node.input_ops['X'] - padding = self._get_padding2D([in_x.height, in_x.width], [node.height, node.width]) - - with self._tf_graph.as_default(): - y = tf.nn.max_pool(x, ksize, strides, padding, name=node.name) - self.tf_ops[node.name] = y - - def run_forward_average_pool(self, node: AveragePool, **kwargs: Any) -> None: - x = self.tf_ops[node.input_ops['X'].name] - ksize = [node.kernel_height, node.kernel_width] - strides = node.strides - in_x = node.input_ops['X'] - padding = self._get_padding2D([in_x.height, in_x.width], [node.height, node.width]) - - y = tf.nn.avg_pool(x, ksize, strides, padding, name=node.name) - self.tf_ops[node.name] = y - - def run_forward_reshape(self, node: Reshape, **kwargs: Any) -> None: - tensor = self.tf_ops[node.input_ops['data'].name] - shape = node.shape - - with self._tf_graph.as_default(): - reshaped = tf.reshape(tensor, shape, name=node.name) - self.tf_ops[node.name] = reshaped - - def run_forward_softmax(self, node: Softmax, **kwargs: Any) -> None: - logits = self.tf_ops[node.input_ops['input'].name] - - with self._tf_graph.as_default(): - output = tf.nn.softmax(logits, name=node.name) - self.tf_ops[node.name] = output - - def run_forward_relu(self, node: Relu, **kwargs: Any) -> None: - features = self.tf_ops[node.input_ops['X'].name] - - with self._tf_graph.as_default(): - y = tf.nn.relu(features, name=node.name) - self.tf_ops[node.name] = y - - def run_forward_flatten(self, node: Flatten, **kwargs: Any) -> None: - inputs = self.tf_ops[node.input_ops['input'].name] - - with self._tf_graph.as_default(): - output = tf.layers.flatten(inputs, name=node.name) - self.tf_ops[node.name] = output - - def run_forward_dropout(self, node: Dropout, **kwargs: Any) -> None: - x = self.tf_ops[node.input_ops['data'].name] - keep_prob = 1 - node.ratio - - with self._tf_graph.as_default(): - output = tf.nn.dropout(x, keep_prob, name=node.name) - self.tf_ops[node.name] = output - - def run_forward_gemm(self, node: Gemm, **kwargs: Any) -> None: - raise NotImplementedError(f'conversion of {node.op_type} is not supported yet.') From 71fabddea2d34b0c8f292b506fac5056affb9cc0 Mon Sep 17 00:00:00 2001 From: nlpng Date: Fri, 14 Dec 2018 08:41:44 +0900 Subject: [PATCH 20/45] Light fix two tests of optimizer & remove test for exporter --- dlk/python/dlk/core/operators.py | 4 +- dlk/python/dlk/core/optimizer.py | 5 +- dlk/tests/test_optimizer.py | 92 +++++++++++++++++++------------- dlk/tests/test_tf_io.py | 40 -------------- 4 files changed, 60 insertions(+), 81 deletions(-) diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py index 9f059fb35..40e12e6bd 100644 --- a/dlk/python/dlk/core/operators.py +++ b/dlk/python/dlk/core/operators.py @@ -281,8 +281,10 @@ def add_outputs(self, outputs: OutOps) -> None: All the key names have to be in list `output_names`. """ + x = set(outputs.keys()) + y = set(self._output_names) assert set(outputs.keys()).issubset( - set(self._output_names)), "Illegal output names included" + set(self._output_names)), f"Illegal output names {y} included {x}" for n in outputs.keys(): lst = self._output_ops.get(n) if lst is not None: diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py index ec259e2c9..6ea1e35b9 100644 --- a/dlk/python/dlk/core/optimizer.py +++ b/dlk/python/dlk/core/optimizer.py @@ -343,6 +343,7 @@ def pass_pack_weights(graph: Graph) -> None: for op in to_be_removed: graph.remove_op(op) + def pass_quantize_convolutions(graph: Graph) -> None: p = Pattern('Conv') matches = find_pattern(graph, p) @@ -359,12 +360,12 @@ def pass_quantize_convolutions(graph: Graph) -> None: # change the output data type of the convolution if thresholds are available if conv_node.has_thresholds: - conv_node.dtype = QUANTIZED_NOT_PACKED + conv_node.dtype = QUANTIZED_NOT_PACKED() # change the output data type of the quantizers conv_node.quantizer.dtype = Uint32 for qtz in conv_node.a_quantizer: - qtz.dtype = QUANTIZED_NOT_PACKED + qtz.dtype = QUANTIZED_NOT_PACKED() def pass_propagate_datatypes(graph) -> None: diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py index da4f55e98..0b1aa7d32 100644 --- a/dlk/tests/test_optimizer.py +++ b/dlk/tests/test_optimizer.py @@ -15,13 +15,14 @@ # ============================================================================= """Test file for GraphRunner.""" import unittest +import numpy as np from core.data_types import Float32, Uint32, Int32, QUANTIZED_NOT_PACKED -from core.graph import Graph, GraphRunner -from core.optimizer import Optimizer +from core.graph import Graph +from core.optimizer import pass_remove_identities, pass_transpose, pass_precompute, \ + pass_propagate_quantization_details_into_conv, pass_compute_thresholds, pass_pack_weights, \ + pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward, pass_dot_graph from core.operators import Add, AveragePool, BatchNormalization, Constant, Conv, Identity, Input, \ MaxPool, Operator, Output, Transpose, QTZ_binary_mean_scaling, QTZ_linear_mid_tread_half, Reshape, Softmax - -import numpy as np from typing import Any, Dict, List, Tuple @@ -36,41 +37,48 @@ def test_precompute1(self) -> None: graph1 = self.create_sample_graph(data1, data2, data3) graph2 = self.create_precompute_graph(data1, data2, data3) - optim = Optimizer() - optim.precompute(graph1) - - # for debug - # from frontend import TensorFlowIO - # from core.model import Model - # import os - # io = TensorFlowIO() - # tmp_dir = os.path.join('tmp') - # if not os.path.exists(tmp_dir): - # os.mkdir(tmp_dir) - # path = os.path.join('tmp', 'test_precompute.pb') - # model = Model() - # model.graph = graph1 - # io.write(model, path) + pass_remove_identities(graph1) + pass_transpose(graph1) - self.assertEqual(graph1, graph2, 'precompute failed.') + pass_propagate_quantization_details_into_conv(graph1) + pass_pack_weights(graph1) + pass_quantize_convolutions(graph1) + pass_propagate_datatypes(graph1) - print("Precompute test #1 passed!") - - def test_precompute2(self) -> None: - """Test code for precompute optimizer.""" - data1 = np.random.rand(3, 2, 2, 3) - data2 = np.random.rand(3, 2, 2, 3) - data3 = np.random.rand(3, 2, 2, 3) - graph1 = self.create_sample_graph(data1, data2, data3) - graph2, scaling1, scaling2 = self.create_quantized_graph(data1, data2, data3) - - optim = Optimizer() - optim.precompute(graph1, hard_quantized=True) + processed_nodes = [] + while pass_precompute(graph1, processed_nodes=processed_nodes): + pass self.assertEqual(graph1, graph2, 'precompute failed.') - self.assertAlmostEqual(graph1.get_op('conv2').quantizer.scaling_factor, scaling2) # type: ignore - print("Precompute test #2 passed!") + print("Precompute test #1 passed!") + + # def test_precompute2(self) -> None: + # """Test code for precompute optimizer.""" + # data1 = np.random.rand(3, 2, 2, 3) + # data2 = np.random.rand(3, 2, 2, 3) + # data3 = np.random.rand(3, 2, 2, 3) + # graph1 = self.create_sample_graph(data1, data2, data3) + # graph2, scaling1, scaling2 = self.create_quantized_graph(data1, data2, data3) + # + # # optim = Optimizer() + # # optim.precompute(graph1, hard_quantized=True) + # pass_remove_identities(graph1) + # pass_transpose(graph1) + # + # pass_propagate_quantization_details_into_conv(graph1) + # pass_pack_weights(graph1) + # pass_quantize_convolutions(graph1) + # pass_propagate_datatypes(graph1) + # + # processed_nodes = [] + # while pass_precompute(graph1, processed_nodes=processed_nodes): + # pass + # + # self.assertEqual(graph1, graph2, 'precompute failed.') + # self.assertAlmostEqual(graph1.get_op('conv2').quantizer.scaling_factor, scaling2) # type: ignore + # + # print("Precompute test #2 passed!") def test_precompute3(self) -> None: """Test code for precompute optimizer.""" @@ -80,8 +88,17 @@ def test_precompute3(self) -> None: graph1 = self.create_sample_graph3(data1, data2, data3) graph2, scaling2, scaling3 = self.create_quantized_graph2(data1, data2, data3) - optim = Optimizer() - optim.precompute(graph1, hard_quantized=True) + pass_remove_identities(graph1) + pass_transpose(graph1) + + pass_propagate_quantization_details_into_conv(graph1) + pass_pack_weights(graph1) + pass_quantize_convolutions(graph1) + pass_propagate_datatypes(graph1) + + processed_nodes = [] + while pass_precompute(graph1, processed_nodes=processed_nodes): + pass self.assertEqual(graph1, graph2, 'precompute failed.') self.assertAlmostEqual(graph1.get_op('conv2').quantizer.scaling_factor, scaling2) # type: ignore @@ -95,8 +112,7 @@ def test_transpose_NHWC(self) -> None: graph1 = self.create_sample_graph2(data) graph2 = self.create_transposed_graph(data) - optim = Optimizer() - optim.transpose_NHWC(graph1) + pass_transpose(graph1) self.assertEqual(graph1, graph2, 'transpose to NHWC failed.') diff --git a/dlk/tests/test_tf_io.py b/dlk/tests/test_tf_io.py index f8fa4f99d..c94f901dc 100644 --- a/dlk/tests/test_tf_io.py +++ b/dlk/tests/test_tf_io.py @@ -210,46 +210,6 @@ def match(op1: Operator, op2: Operator) -> bool: return False return True - def test_tf_export(self) -> None: - """Test code for exporting Tensorflow file with TensorflowIO.""" - tmpdir = 'tmp' - tf_path = path.join(tmpdir, - 'test.pb') - if not path.exists(tmpdir): - makedirs(tmpdir) - elif not path.isdir(tmpdir): - raise ValueError('tmp directory is not a directory.') - - model: Model = self.make_simple_model() - - tf_io = TensorFlowIO() - tf_io.write(model, tf_path) - new_model = tf_io.read(tf_path) - - self.assertTrue(self._comparator(model.graph, new_model.graph)) - - print("TF file export test #1 passed!") - - def test_tf_export2(self) -> None: - """Test code for exporting Tensorflow file with TensorflowIO #2.""" - tmpdir = 'tmp' - tf_path = path.join(tmpdir, - 'test2.pb') - if not path.exists(tmpdir): - makedirs(tmpdir) - elif not path.isdir(tmpdir): - raise ValueError('tmp directory is not a directory.') - - model: Model = make_model() - - tf_io = TensorFlowIO() - tf_io.write(model, tf_path) - new_model = tf_io.read(tf_path) - - self.assertEqual(model.graph, new_model.graph) - - print("TF file export test #2 passed!") - if __name__ == '__main__': unittest.main() From 5a0014f8a6a9482fb5e404f63a46b38343d57454 Mon Sep 17 00:00:00 2001 From: nlpng Date: Fri, 14 Dec 2018 10:11:01 +0900 Subject: [PATCH 21/45] Fix tests for optimizer --- dlk/python/dlk/core/operators.py | 5 +-- dlk/python/dlk/core/optimizer.py | 2 +- dlk/tests/test_optimizer.py | 58 +++++++++++++++----------------- 3 files changed, 29 insertions(+), 36 deletions(-) diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py index 40e12e6bd..223066470 100644 --- a/dlk/python/dlk/core/operators.py +++ b/dlk/python/dlk/core/operators.py @@ -281,10 +281,7 @@ def add_outputs(self, outputs: OutOps) -> None: All the key names have to be in list `output_names`. """ - x = set(outputs.keys()) - y = set(self._output_names) - assert set(outputs.keys()).issubset( - set(self._output_names)), f"Illegal output names {y} included {x}" + assert set(outputs.keys()).issubset(set(self._output_names)), f"Illegal output names included" for n in outputs.keys(): lst = self._output_ops.get(n) if lst is not None: diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py index 6ea1e35b9..77e08312c 100644 --- a/dlk/python/dlk/core/optimizer.py +++ b/dlk/python/dlk/core/optimizer.py @@ -137,7 +137,7 @@ def pass_precompute(graph: Graph, processed_nodes) -> bool: # get nodes to be removed after being disconnected get_nodes_in_branch(m.node, None, to_be_removed) - new_constant.add_outputs(m.node.output_ops) + new_constant.add_outputs({'output': m.node.output_ops.values()}) for output_name, consumer_list in m.node.output_ops.items(): for consumer_node in consumer_list: for input_name, input_node in consumer_node.input_ops.items(): diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py index 0b1aa7d32..0e58d65b5 100644 --- a/dlk/tests/test_optimizer.py +++ b/dlk/tests/test_optimizer.py @@ -15,15 +15,16 @@ # ============================================================================= """Test file for GraphRunner.""" import unittest -import numpy as np from core.data_types import Float32, Uint32, Int32, QUANTIZED_NOT_PACKED -from core.graph import Graph from core.optimizer import pass_remove_identities, pass_transpose, pass_precompute, \ pass_propagate_quantization_details_into_conv, pass_compute_thresholds, pass_pack_weights, \ pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward, pass_dot_graph +from core.graph import Graph from core.operators import Add, AveragePool, BatchNormalization, Constant, Conv, Identity, Input, \ MaxPool, Operator, Output, Transpose, QTZ_binary_mean_scaling, QTZ_linear_mid_tread_half, Reshape, Softmax -from typing import Any, Dict, List, Tuple + +import numpy as np +from typing import Tuple class TestOptimizer(unittest.TestCase): @@ -40,9 +41,29 @@ def test_precompute1(self) -> None: pass_remove_identities(graph1) pass_transpose(graph1) + processed_nodes = [] + while pass_precompute(graph1, processed_nodes=processed_nodes): + pass + + self.assertEqual(graph1, graph2, 'precompute failed.') + + print("Precompute test #1 passed!") + + def test_precompute2(self) -> None: + """Test code for precompute optimizer.""" + data1 = np.random.rand(3, 2, 2, 3) + data2 = np.random.rand(3, 2, 2, 3) + data3 = np.random.rand(3, 2, 2, 3) + graph1 = self.create_sample_graph(data1, data2, data3) + graph2, scaling1, scaling2 = self.create_quantized_graph(data1, data2, data3) + + pass_remove_identities(graph1) + pass_transpose(graph1) + pass_propagate_quantization_details_into_conv(graph1) pass_pack_weights(graph1) pass_quantize_convolutions(graph1) + pass_propagate_datatypes(graph1) processed_nodes = [] @@ -50,35 +71,9 @@ def test_precompute1(self) -> None: pass self.assertEqual(graph1, graph2, 'precompute failed.') + self.assertAlmostEqual(graph1.get_op('conv2').quantizer.scaling_factor, scaling2) # type: ignore - print("Precompute test #1 passed!") - - # def test_precompute2(self) -> None: - # """Test code for precompute optimizer.""" - # data1 = np.random.rand(3, 2, 2, 3) - # data2 = np.random.rand(3, 2, 2, 3) - # data3 = np.random.rand(3, 2, 2, 3) - # graph1 = self.create_sample_graph(data1, data2, data3) - # graph2, scaling1, scaling2 = self.create_quantized_graph(data1, data2, data3) - # - # # optim = Optimizer() - # # optim.precompute(graph1, hard_quantized=True) - # pass_remove_identities(graph1) - # pass_transpose(graph1) - # - # pass_propagate_quantization_details_into_conv(graph1) - # pass_pack_weights(graph1) - # pass_quantize_convolutions(graph1) - # pass_propagate_datatypes(graph1) - # - # processed_nodes = [] - # while pass_precompute(graph1, processed_nodes=processed_nodes): - # pass - # - # self.assertEqual(graph1, graph2, 'precompute failed.') - # self.assertAlmostEqual(graph1.get_op('conv2').quantizer.scaling_factor, scaling2) # type: ignore - # - # print("Precompute test #2 passed!") + print("Precompute test #2 passed!") def test_precompute3(self) -> None: """Test code for precompute optimizer.""" @@ -94,6 +89,7 @@ def test_precompute3(self) -> None: pass_propagate_quantization_details_into_conv(graph1) pass_pack_weights(graph1) pass_quantize_convolutions(graph1) + pass_propagate_datatypes(graph1) processed_nodes = [] From fc27ab14bcd7027805a4b3537ee450fdafc760cb Mon Sep 17 00:00:00 2001 From: nlpng Date: Fri, 14 Dec 2018 10:35:19 +0900 Subject: [PATCH 22/45] Remove test_graphrunner since these is no graphrunner to test --- dlk/tests/test_graphrunner.py | 232 ---------------------------------- dlk/tests/test_optimizer.py | 2 +- 2 files changed, 1 insertion(+), 233 deletions(-) delete mode 100644 dlk/tests/test_graphrunner.py diff --git a/dlk/tests/test_graphrunner.py b/dlk/tests/test_graphrunner.py deleted file mode 100644 index 4eeb91f44..000000000 --- a/dlk/tests/test_graphrunner.py +++ /dev/null @@ -1,232 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2018 The Blueoil Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= -"""Test file for GraphRunner.""" -import unittest -from core.data_types import Float32 -from core.graph import Graph, GraphRunner -from core.operators import Conv, Input, Output, Constant, Operator -import numpy as np -from typing import Any, Dict, List - - -class TestRunner(GraphRunner): - """Test class of GraphRunner. - - This just list up all the op_type of the graph. - """ - - def __init__(self, graph: Graph, depth_first: bool = True, lazy: bool = True) -> None: - self.message: List[str] = [] - super().__init__(graph, depth_first=depth_first, lazy=lazy) - - def initialize(self, **kwargs: Any) -> None: - self.message.append('start running.') - - def finalize(self, **kwargs: Any) -> None: - self.message.append('finished running.') - - # backward: ouput -> inputs - def run_backward_by_default(self, node: Operator, **kwargs: Any) -> None: - kwargs['backward'].append(node.name) - - def run_backward_conv(self, node: Conv, **kwargs: Any) -> None: - self.message.append(f'{node.name}: backward process') - super().run_backward_conv(node, **kwargs) - - # forward: inputs -> output - def run_forward_by_default(self, node: Operator, **kwargs: Any) -> None: - kwargs['forward'].append(node.name) - - def run_forward_conv(self, node: Conv, **kwargs: Any) -> None: - self.message.append(f'{node.name}: forward process') - super().run_forward_conv(node, **kwargs) - - -class TestGraphRunner(unittest.TestCase): - """Test class for GraphRunner.""" - - def test_graphrunner_default(self) -> None: - """Test code for GraphRunner, with the depth-first mode (default).""" - graph = Graph() - self.create_graph(graph) - - kwargs: Dict[str, List[str]] = {'backward': [], 'forward': []} - runner = TestRunner(graph) - runner.run(**kwargs) - - lst1 = ['output', 'conv4', 'input3', 'conv3', 'input2', 'conv2', 'conv1', 'input1', 'weight1', 'weight2'] - self.assertEqual(kwargs['backward'], lst1, - 'backward traversal failed in depth-first mode.') - - lst2 = ['input3', 'input2', 'input1', 'weight1', 'conv1', 'weight2', 'conv2', 'conv3', 'conv4', 'output'] - self.assertEqual(kwargs['forward'], lst2, 'forward traversal failed in depth-first mode.') - - self.assertEqual(runner.message, [ - 'start running.', - 'conv4: backward process', - 'conv3: backward process', - 'conv2: backward process', - 'conv1: backward process', - 'conv1: forward process', - 'conv2: forward process', - 'conv3: forward process', - 'conv4: forward process', - 'finished running.', - ]) - - print("GraphRunner depth-first mode test passed!") - - def test_graphrunner_breadth_first(self) -> None: - """Test code for GraphRunner, with the breadth-first mode.""" - graph = Graph() - self.create_graph(graph) - - kwargs: Dict[str, List[str]] = {'backward': [], 'forward': []} - runner = TestRunner(graph, depth_first=False, lazy=False) - runner.run(**kwargs) - - lst1 = ['output', 'conv4', 'input3', 'conv3', 'input2', 'conv2', 'conv1', 'weight2', 'input1', 'weight1'] - self.assertEqual(kwargs['backward'], lst1, - 'backward traversal failed in breadth-first mode.') - - lst2 = ['input3', 'input2', 'input1', 'weight1', 'weight2', - 'conv4', 'conv3', 'conv1', 'conv2', 'output'] - self.assertEqual(kwargs['forward'], lst2, 'forward traversal failed in breadth-first mode.') - - self.assertEqual(runner.message, [ - 'start running.', - 'conv4: backward process', - 'conv3: backward process', - 'conv2: backward process', - 'conv1: backward process', - 'conv4: forward process', - 'conv3: forward process', - 'conv1: forward process', - 'conv2: forward process', - 'finished running.', - ]) - - print("GraphRunner bradth-first mode test passed!") - - def test_graphrunner_lazy_breadth_first(self) -> None: - """Test code for GraphRunner, with the lazy breadth-first mode.""" - graph = Graph() - self.create_graph(graph) - - kwargs: Dict[str, List[str]] = {'backward': [], 'forward': []} - runner = TestRunner(graph, depth_first=False, lazy=True) - runner.run(**kwargs) - - lst1 = ['output', 'conv4', 'input3', 'conv3', 'input2', 'conv2', 'conv1', 'weight2', 'input1', 'weight1'] - self.assertEqual(kwargs['backward'], lst1, - 'backward traversal failed in breadth-first mode.') - - lst2 = ['input3', 'input2', 'input1', 'weight1', 'weight2', - 'conv1', 'conv2', 'conv3', 'conv4', 'output'] - self.assertEqual(kwargs['forward'], lst2, 'forward traversal failed in breadth-first mode.') - - self.assertEqual(runner.message, [ - 'start running.', - 'conv4: backward process', - 'conv3: backward process', - 'conv2: backward process', - 'conv1: backward process', - 'conv1: forward process', - 'conv2: forward process', - 'conv3: forward process', - 'conv4: forward process', - 'finished running.', - ]) - - print("GraphRunner lazy breadth-first mode test passed!") - - def create_graph(self, graph): - - x1 = Input( - 'input1', - [1, 4, 4, 3], - Float32(), - ) - - w1 = Constant( - 'weight1', - Float32(), - np.zeros([1, 2, 2, 3]) - ) - - conv1 = Conv( - 'conv1', - [1, 3, 3, 3], - Float32(), - {'X': x1, 'W': w1}, - kernel_shape=[2, 2] - ) - - w2 = Constant( - 'weight2', - Float32(), - np.zeros([3, 2, 2, 3]) - ) - - conv2 = Conv( - 'conv2', - [1, 2, 2, 3], - Float32(), - {'X': conv1, 'W': w2}, - kernel_shape=[2, 2] - ) - - x2 = Input( - 'input2', - [3, 3, 3, 3], - Float32(), - ) - - x3 = Input( - 'input3', - [3, 3, 3, 3], - Float32(), - ) - - conv3 = Conv( - 'conv3', - [3, 2, 2, 3], - Float32(), - {'X': x2, 'W': conv2}, - kernel_shape=[2, 2] - ) - - conv4 = Conv( - 'conv4', - [1, 2, 2, 3], - Float32(), - {'X': x3, 'W': conv3}, - kernel_shape=[2, 2] - ) - - y = Output( - 'output', - [1, 2, 2, 3], - Float32(), - {'input': conv4} - ) - - # add ops to the graph - graph.add_op_and_inputs(y) - - -if __name__ == '__main__': - unittest.main() diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py index 0e58d65b5..9e0d60db0 100644 --- a/dlk/tests/test_optimizer.py +++ b/dlk/tests/test_optimizer.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================= -"""Test file for GraphRunner.""" +"""Test file for Optimizer.""" import unittest from core.data_types import Float32, Uint32, Int32, QUANTIZED_NOT_PACKED from core.optimizer import pass_remove_identities, pass_transpose, pass_precompute, \ From 90a49658f21c4938641d5197259f7368b5774e1a Mon Sep 17 00:00:00 2001 From: Antonio Date: Tue, 25 Dec 2018 12:35:23 +0900 Subject: [PATCH 23/45] Added docstrings and deleted some unused code --- dlk/python/dlk/core/graph.py | 10 +- dlk/python/dlk/core/graph_pattern_matching.py | 73 ++++++++++- dlk/python/dlk/core/operators.py | 3 - dlk/python/dlk/core/optimizer.py | 118 +++++++++++++++++- dlk/python/dlk/scripts/generate_project.py | 2 +- 5 files changed, 187 insertions(+), 19 deletions(-) diff --git a/dlk/python/dlk/core/graph.py b/dlk/python/dlk/core/graph.py index 6c569b041..e6c117b1d 100644 --- a/dlk/python/dlk/core/graph.py +++ b/dlk/python/dlk/core/graph.py @@ -16,14 +16,8 @@ """Graph module.""" from collections import OrderedDict, defaultdict from typing import cast, Any, Dict, List, Optional, Set, TYPE_CHECKING -import functools - -from core.operators import Add, AveragePool, BatchNormalization, Constant, Conv, Identity, Input, \ - MaxPool, Operator, Output, Transpose, QTZ_binary_mean_scaling, QTZ_linear_mid_tread_half, Reshape, Softmax, \ - Relu, Flatten, Dropout, Gemm, SpaceToDepth, Mul, QTZ_binary_channel_wise_mean_scaling, ConcatOnDepth, Maximum, \ - DepthToSpace, Split - -from core.graph_pattern_matching import sort_graph, find_pattern, Pattern +from core.operators import Conv, Operator +from core.graph_pattern_matching import sort_graph class Graph(object): diff --git a/dlk/python/dlk/core/graph_pattern_matching.py b/dlk/python/dlk/core/graph_pattern_matching.py index 95862d260..306cb6d6b 100644 --- a/dlk/python/dlk/core/graph_pattern_matching.py +++ b/dlk/python/dlk/core/graph_pattern_matching.py @@ -17,23 +17,58 @@ class Pattern: + """Pattern is a sub-graph based on the operator types. + It is a recursive pattern where a Pattern holds a operator type and a list of inputs. + Each input in this list is also a Pattern. + """ def __init__(self, op=str(), inputs=list()): self.op = op self.inputs = inputs class NodeMatch: + """NodeMatch defines a sub-graph that match a given Pattern. + It is a recursive pattern where a NodeMatch holds a reference to the matched node and a list of inputs. + Each input in this list is also a NodeMatch. + """ def __init__(self): self.node = None self.inputs = list() def find_pattern(graph, pattern): + """Helper function that find a pattern in a graph. + + Parameters + ---------- + graph : Graph + The input graph where we will try to find the given pattern. + + pattern : Pattern + The pattern we want to look for. + + Returns + ------- + result : [NodeMatch] + A list of matches. Each element of the list is a NodeMatch. + """ gm = GraphMatcher(graph) return gm.get_op_type_matches(pattern) def sort_graph(graph): + """Helper function to topologically sort a given graph. + + Parameters + ---------- + graph : Graph + The input graph to be sorted. It is not modified. + + Returns + ------- + result : [Operator] + A list of Operator. Each element of the list is a reference to a Operator object. + """ exec_list = list() input_nodes = list() for node in graph.operators: @@ -55,6 +90,19 @@ def sort_graph(graph): def top_order(output_node, exec_list, visited): + """It topologically sorts a given graph. + + Parameters + ---------- + output_node : Operator + The starting node. First one in the ordered list. + + exec_list : [Operator] + The ordered list. Note that this is an output parameter. + + visited : [str] + List of already visited nodes. + """ if visited[output_node.name]: return for input_node in output_node.input_nodes: @@ -65,6 +113,23 @@ def top_order(output_node, exec_list, visited): def get_nodes_in_branch(starting_node, stop_node, node_list): + """Helper function that gives us all nodes in a branch defined by a given node. + The starting node will be the output node of the branch. + + Note that there is an optional stop node. stop_node is allowed to be None. + + Parameters + ---------- + starting_node : Operator + The starting node. This node is the output node of the defined branch. + + stop_node : Operator + The last node in the path. If stop_node is None then this function will give us every node above + starting_node. + + node_list : [Operator] + The list of nodes contained in the branch. Note that this is an output parameter. + """ if starting_node == stop_node: return node_list.append(starting_node) @@ -73,13 +138,9 @@ def get_nodes_in_branch(starting_node, stop_node, node_list): get_nodes_in_branch(node, stop_node, node_list) -def match_to_execution_list(match, execution_list): - for input_node in match.inputs: - match_to_execution_list(input_node, execution_list) - execution_list.append(match.node) - - class GraphMatcher: + """GraphMatcher is used to find sub-graphs in the computational graph. + """ def __init__(self, input_graph): self.graph_node_list = list() self.graph_node_list = sort_graph(input_graph) diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py index 223066470..d662d64fd 100644 --- a/dlk/python/dlk/core/operators.py +++ b/dlk/python/dlk/core/operators.py @@ -17,7 +17,6 @@ import functools import copy from itertools import dropwhile -from collections import OrderedDict from typing import cast, Any, Dict, Optional, TYPE_CHECKING from core.view import View from utils import classproperty @@ -55,8 +54,6 @@ def __init__(self, self._check_consistency() self._rank = len(shape) self._available_buffer = '' - self._visited = False - self._prop_details = Dict def __update_shape(self, shape: List[int], dimension_format: str) -> None: self._shape: List[int] = shape diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py index 77e08312c..ca8d9a2b5 100644 --- a/dlk/python/dlk/core/optimizer.py +++ b/dlk/python/dlk/core/optimizer.py @@ -27,7 +27,17 @@ def pass_dot_graph(graph: Graph, filename) -> None: + """Generate a GraphViz dot file from a given Graph. + Parameters + ---------- + graph : Graph + The input graph to be converted into a dot script + + filename : str + The file where we want to save the dot script + + """ dot_script = 'digraph {' code = {} @@ -55,6 +65,14 @@ def pass_dot_graph(graph: Graph, filename) -> None: def pass_remove_identities(graph: Graph) -> None: + """Removes those nodes of a Graph that satisfies the condition node.op_type() == Identity. + + Parameters + ---------- + graph : Graph + The input graph. It will be modified in-place. + + """ p = Pattern("Identity") matches = find_pattern(graph, p) to_be_removed = list() @@ -83,6 +101,18 @@ def pass_remove_identities(graph: Graph) -> None: def pass_transpose(graph: Graph) -> None: + """Changes the data order of every node to be NHWC. + The fastest changing dimension is C + N stands for batch size (on inference we assume is 1. + H and W are the height and width respectively. + C stands for depth (aka channels) + + Parameters + ---------- + graph : Graph + The input graph. It will be modified in-place. + + """ p = Pattern("*") matches = find_pattern(graph, p) @@ -100,6 +130,21 @@ def pass_transpose(graph: Graph) -> None: def pass_precompute(graph: Graph, processed_nodes) -> bool: + """Given a node N, if the value of each input of N is known at compilation time then N will be executed. + The node N and its inputs will be replaced with a Constant node which holds the computed output of N. + + Parameters + ---------- + graph : Graph + The input graph. It will be modified in-place. + processed_nodes : list + The list of the processed nodes so far. + + Returns + ------- + result : bool + True if some nodes were precomputed, False otherwise. + """ p = Pattern('*') matches = find_pattern(graph, p) processed_before_precompute = len(processed_nodes) @@ -152,6 +197,25 @@ def pass_precompute(graph: Graph, processed_nodes) -> bool: def pass_propagate_quantization_details_into_conv(graph: Graph) -> None: + """Given a node N, it will propagate information about quantization into the convolution nodes. + + There are two types of nodes. Those which preserve quantization (for example, Space2Depth because + does not affect the actual values of the input data, only changes it positions) and those which + destroy quantization (for example, BatchNormalization, because it involves float operations). + + If there is path in the Graph which connect a Quantizer node Q to a Conv node C and every node between + Q and C preserve quantization (for example, Q -> Space2Depth -> Concat > Conv) then the details about the + quantizer Q should be propagated into the convolution node C. + + This pass allows us to further process the convolution nodes later and maybe quantize these convolutions + based on these quantization details. Note that a convolution node has two inputs, input data and weights. + We propagate quantization details through both the input node branch and the weight node branch. + + Parameters + ---------- + graph : Graph + The input graph. It will be modified in-place. + """ p = Pattern('*') matches = find_pattern(graph, p) qtypes = [ @@ -188,11 +252,25 @@ def pass_propagate_quantization_details_into_conv(graph: Graph) -> None: def pass_compute_thresholds(graph: Graph) -> None: + """Given a Quantizer node Q: + - if there is a backward path between Q and a convolution node and, + - every node N of that path satisfies the condition N.is_monotonic and, + - the convolution node C (the end of this path) is a quantized convolution + then this pass construct an LUT per channel which maps a possible output value of the quantized convolution node + C to the corresponding output of the quantization node Q. This effectively compress the path C -> ... -> Q + into a list of LUTs that can be used during inference. + + Parameters + ---------- + graph : Graph + The input graph. It will be modified in-place. + """ p = Pattern('QTZ_linear_mid_tread_half') matches = find_pattern(graph, p) to_be_removed = [] for m in matches: + # find a a backward path between the quantizer and the convolution ie. a path represented by a list [Q, ..., C] p = [m.node] while p[-1].op_type != 'Conv': non_variable_input = [inode for inode in p[-1].input_nodes @@ -240,7 +318,7 @@ def pass_compute_thresholds(graph: Graph) -> None: for th_id, th_v in enumerate(th_val): init_threshold = np.full(ch, th_v, dtype=np.float64) - # run calculation in reverse order: q -> bn -> scaling + # run calculation in reverse order, for example, q -> bn -> scaling trans_th = {'data': init_threshold} for op in p[:-1]: trans_th = op.de_run(**trans_th) @@ -288,6 +366,15 @@ def pass_compute_thresholds(graph: Graph) -> None: def pass_pack_weights(graph: Graph) -> None: + """Given a Quantized convolution node C, it will pack the weights of C into 32 bit words. + If the node Q that apply quantization to the weights of C quantizes, for example, into 1 bit values + then one 32 bit word will contain 32 weights. + + Parameters + ---------- + graph : Graph + The input graph. It will be modified in-place. + """ p = Pattern('Conv') matches = find_pattern(graph, p) quantization_types = [ @@ -345,6 +432,15 @@ def pass_pack_weights(graph: Graph) -> None: def pass_quantize_convolutions(graph: Graph) -> None: + """Given a convolution node C, if C has proper quantization details, it will mark C as quantized and it will + assign the correct output data types to the node C and its quantizers. Note that the expected output data type + on the runtime is defined as QUANTIZED_NOT_PACKED. + + Parameters + ---------- + graph : Graph + The input graph. It will be modified in-place. + """ p = Pattern('Conv') matches = find_pattern(graph, p) @@ -369,6 +465,13 @@ def pass_quantize_convolutions(graph: Graph) -> None: def pass_propagate_datatypes(graph) -> None: + """Further propagate output data types. + + Parameters + ---------- + graph : Graph + The input graph. It will be modified in-place. + """ p = Pattern('*') matches = find_pattern(graph, p) @@ -378,6 +481,19 @@ def pass_propagate_datatypes(graph) -> None: def pass_propagate_output_type_backward(graph: Graph) -> None: + """It is assumed that the output data type of a Graph is float. + We should propagate this assumption backwards from the output node of the graph to the + latest quantized convolution available. + + There could be cases where the latest convolution node Q is a quantized convolution and we also apply + thresholds to its outputs. In this cases, the quantized convolution output data type should be float + even if thresholds are applied. + + Parameters + ---------- + graph : Graph + The input graph. It will be modified in-place. + """ p = Pattern('*') matches = find_pattern(graph, p) diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py index 8bd0a2d63..ef3d21573 100644 --- a/dlk/python/dlk/scripts/generate_project.py +++ b/dlk/python/dlk/scripts/generate_project.py @@ -39,7 +39,7 @@ def optimize_graph_step(model: Model, config: Config) -> None: - """Optimze graph in the model. + """Optimize graph in the model. Parameters ---------- From ecf57f6a1cba02dca945e28caaef47222a6ef0a7 Mon Sep 17 00:00:00 2001 From: Antonio Date: Tue, 25 Dec 2018 13:50:31 +0900 Subject: [PATCH 24/45] Fix PEP8 --- dlk/python/dlk/core/graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlk/python/dlk/core/graph.py b/dlk/python/dlk/core/graph.py index e6c117b1d..c09bcdf0d 100644 --- a/dlk/python/dlk/core/graph.py +++ b/dlk/python/dlk/core/graph.py @@ -16,7 +16,7 @@ """Graph module.""" from collections import OrderedDict, defaultdict from typing import cast, Any, Dict, List, Optional, Set, TYPE_CHECKING -from core.operators import Conv, Operator +from core.operators import Conv, Operator from core.graph_pattern_matching import sort_graph From b284dabaaf8bad4978b963b4b4c644a4657b1a0a Mon Sep 17 00:00:00 2001 From: nlpng Date: Tue, 8 Jan 2019 11:24:20 +0900 Subject: [PATCH 25/45] Fix comment typo --- dlk/python/dlk/core/optimizer.py | 2 +- dlk/python/dlk/templates/Makefile.tpl | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py index ca8d9a2b5..6d59e6554 100644 --- a/dlk/python/dlk/core/optimizer.py +++ b/dlk/python/dlk/core/optimizer.py @@ -334,7 +334,7 @@ def pass_compute_thresholds(graph: Graph) -> None: if (scaling_factor < 0) ^ (ch_id in trans_th['nega_idx']) \ else int(math.ceil(th_per_ch)) - # take care of threshold values that are larger than 16-bit signed integer + # take care of threshold values that are larger than 13-bit signed integer threshold_table[abs(threshold_table) > max_th_value] = max_th_value for c in range(ch): diff --git a/dlk/python/dlk/templates/Makefile.tpl b/dlk/python/dlk/templates/Makefile.tpl index b8c417f0d..14d5a2b8b 100644 --- a/dlk/python/dlk/templates/Makefile.tpl +++ b/dlk/python/dlk/templates/Makefile.tpl @@ -134,17 +134,17 @@ clean: -$(RM) $(OBJ) lm_x86: CXX = g++ -lm_x86: FLAGS += $(INCLUDES) -O3 -std=c++0x -g -DUSE_PNG -pthread -g -DFUNC_TIME_MEASUREMENT +lm_x86: FLAGS += $(INCLUDES) -O3 -std=c++0x -g -DUSE_PNG -pthread -g lm_aarch64: CXX = aarch64-linux-gnu-g++ -lm_aarch64: FLAGS += $(INCLUDES) -O3 -std=c++0x -g -DUSE_NEON -DUSE_PNG -pthread -g -DFUNC_TIME_MEASUREMENT +lm_aarch64: FLAGS += $(INCLUDES) -O3 -std=c++0x -g -DUSE_NEON -DUSE_PNG -pthread -g lm_arm: CXX = arm-linux-gnueabihf-g++ -lm_arm: FLAGS += $(INCLUDES) -std=c++0x -O3 -DUSE_NEON -DUSE_PNG -mcpu=cortex-a9 -mfpu=neon -mthumb -s -pthread -g -fopenmp -DFUNC_TIME_MEASUREMENT +lm_arm: FLAGS += $(INCLUDES) -std=c++0x -O3 -DUSE_NEON -DUSE_PNG -mcpu=cortex-a9 -mfpu=neon -mthumb -s -pthread -g -fopenmp lm_arm: CXXFLAGS += lm_fpga: CXX = arm-linux-gnueabihf-g++ -lm_fpga: FLAGS += $(INCLUDES) -std=c++0x -O3 -DUSE_NEON -DRUN_ON_FPGA -DUSE_PNG -mcpu=cortex-a9 -mfpu=neon -mthumb -s -pthread -g -fopenmp -DFUNC_TIME_MEASUREMENT +lm_fpga: FLAGS += $(INCLUDES) -std=c++0x -O3 -DUSE_NEON -DRUN_ON_FPGA -DUSE_PNG -mcpu=cortex-a9 -mfpu=neon -mthumb -s -pthread -g -fopenmp lm_fpga: CXXFLAGS += lib_x86: CXX = g++ From 9f01c686ddc66a1d49742781f98b6f1583d0c685 Mon Sep 17 00:00:00 2001 From: nlpng Date: Tue, 8 Jan 2019 11:28:50 +0900 Subject: [PATCH 26/45] remove redundant method and flags --- dlk/python/dlk/core/optimizer.py | 38 ---------------------- dlk/python/dlk/scripts/generate_project.py | 2 +- dlk/tests/test_optimizer.py | 2 +- 3 files changed, 2 insertions(+), 40 deletions(-) diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py index 6d59e6554..0be25edf2 100644 --- a/dlk/python/dlk/core/optimizer.py +++ b/dlk/python/dlk/core/optimizer.py @@ -26,44 +26,6 @@ from modules.packer import Packer -def pass_dot_graph(graph: Graph, filename) -> None: - """Generate a GraphViz dot file from a given Graph. - - Parameters - ---------- - graph : Graph - The input graph to be converted into a dot script - - filename : str - The file where we want to save the dot script - - """ - dot_script = 'digraph {' - - code = {} - counter = 0 - for node in graph.operators: - code[node.name] = counter - counter += 1 - - for node in graph.operators: - - shape = '-' - if node.shape: - shape = 'x'.join(str(x) for x in node.shape) - shape += '(' + node.dimension + ')' - - dot_script += node.name + '[label=" ' + format(code[node.name], '04X') + '| ' + \ - node.op_type + '| ' + shape + '| ' + node.dtype.cpptype() + '" shape = "record"];' - for i in node.input_nodes: - dot_script += i.name + ' -> ' + node.name + ';' - - dot_script += '}' - - with open(filename, 'w') as f: - f.write(dot_script) - - def pass_remove_identities(graph: Graph) -> None: """Removes those nodes of a Graph that satisfies the condition node.op_type() == Identity. diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py index ef3d21573..c19d3933b 100644 --- a/dlk/python/dlk/scripts/generate_project.py +++ b/dlk/python/dlk/scripts/generate_project.py @@ -31,7 +31,7 @@ from frontend import TensorFlowIO from core.optimizer import pass_remove_identities, pass_transpose, pass_precompute, \ pass_propagate_quantization_details_into_conv, pass_compute_thresholds, pass_pack_weights, \ - pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward, pass_dot_graph + pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward SCRITPS_DIR = path.abspath(path.dirname(__file__)) DLK_ROOT_DIR = path.abspath(path.join(SCRITPS_DIR, '..')) diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py index 9e0d60db0..88cbfbeaa 100644 --- a/dlk/tests/test_optimizer.py +++ b/dlk/tests/test_optimizer.py @@ -18,7 +18,7 @@ from core.data_types import Float32, Uint32, Int32, QUANTIZED_NOT_PACKED from core.optimizer import pass_remove_identities, pass_transpose, pass_precompute, \ pass_propagate_quantization_details_into_conv, pass_compute_thresholds, pass_pack_weights, \ - pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward, pass_dot_graph + pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward from core.graph import Graph from core.operators import Add, AveragePool, BatchNormalization, Constant, Conv, Identity, Input, \ MaxPool, Operator, Output, Transpose, QTZ_binary_mean_scaling, QTZ_linear_mid_tread_half, Reshape, Softmax From 14cf8cc73701b7c1382f910da9783765f31dfec1 Mon Sep 17 00:00:00 2001 From: nlpng Date: Tue, 8 Jan 2019 13:42:38 +0900 Subject: [PATCH 27/45] Implement preserve_quantization in each operator --- dlk/python/dlk/core/operators.py | 48 +++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py index d662d64fd..de6f905d2 100644 --- a/dlk/python/dlk/core/operators.py +++ b/dlk/python/dlk/core/operators.py @@ -485,7 +485,9 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li @property def preserve_quantization(self) -> bool: - return False + """whether to preserve the operator for quantization""" + raise NotImplementedError( + f'Preservation for quantization of operator {self.op_type} is not defined.') class Variable(Operator): @@ -525,6 +527,10 @@ def data(self) -> np.ndarray: def data(self, val: np.ndarray) -> None: self._data = val + @property + def preserve_quantization(self) -> bool: + return False + class Input(Variable): """Input class. This is a placeholder.""" @@ -676,6 +682,10 @@ def max_v(self) -> float: def scaling_factor(self) -> np.float32: return self._scaling_factor + @property + def preserve_quantization(self) -> bool: + return False + @scaling_factor.setter def scaling_factor(self, val: np.float32) -> None: self._scaling_factor = val @@ -1324,6 +1334,10 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li def _dispatch_name(self) -> str: return "batch_normalization" + @property + def preserve_quantization(self) -> bool: + return False + class QTZ_linear_mid_tread_half(Quantizer): """Quantization operator with 'linear mid tread half'. @@ -1506,6 +1520,10 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li return output_shape + @property + def preserve_quantization(self) -> bool: + return False + class Pool(Operator): """Pooling operator. @@ -1634,6 +1652,10 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li perm = [format.index(s) for s in 'NCHW'] return [NCHW[i] for i in perm] + @property + def preserve_quantization(self) -> bool: + return False + class MaxPool(Pool): """Max pooling operator. @@ -1852,6 +1874,10 @@ def run_forward(self) -> np.ndarray: self._data = exp / np.expand_dims(exp.sum(axis=-1), -1) return self._data + @property + def preserve_quantization(self) -> bool: + return False + class Relu(Operator): """Relu class. @@ -1887,6 +1913,10 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li attrs: Dict[str, Any]) -> List[int]: return lists['X'] + @property + def preserve_quantization(self) -> bool: + return False + class Flatten(Operator): """Flatten class. @@ -2010,6 +2040,10 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li attrs: Dict[str, Any]) -> List[int]: return lists['data'] + @property + def preserve_quantization(self) -> bool: + return False + class Gemm(Operator): """Gemm operator. @@ -2096,6 +2130,10 @@ def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: Li return [M, N] + @property + def preserve_quantization(self) -> bool: + return False + class Mul(Operator): """Mul operator. @@ -2171,6 +2209,10 @@ def run_forward(self) -> np.ndarray: def is_monotonic(self) -> bool: return False + @property + def preserve_quantization(self) -> bool: + return False + @classmethod def infer_shape(cls, lists: Dict[str, List[int]], format: str, input_formats: List[str], attrs: Dict[str, Any]) -> List[int]: @@ -2369,6 +2411,10 @@ def _dispatch_name(self) -> str: def is_monotonic(self) -> bool: return False + @property + def preserve_quantization(self) -> bool: + return False + class DepthToSpace(Operator): """Depth to Space operator. From 74da1fd8ca9a3214d7b8b6c9c7a97bfbd8bc5fd8 Mon Sep 17 00:00:00 2001 From: nlpng Date: Tue, 8 Jan 2019 13:45:16 +0900 Subject: [PATCH 28/45] Move nega_idx aggregation into optimization pass --- dlk/python/dlk/core/operators.py | 2 -- dlk/python/dlk/core/optimizer.py | 8 ++++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py index de6f905d2..f4cd00b39 100644 --- a/dlk/python/dlk/core/operators.py +++ b/dlk/python/dlk/core/operators.py @@ -1294,8 +1294,6 @@ def run(self, **kwargs) -> Dict: mean = np.float64(self._input_ops['mean'].data) var = np.float64(self._input_ops['var'].data) - kwargs['nega_idx'] = [v for v in range(len(scale)) if scale[v] < 0] - x_norm = (kwargs['data'] - mean) / np.sqrt(var + self.epsilon) kwargs['data'] = scale * x_norm + beta return kwargs diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py index 0be25edf2..642687981 100644 --- a/dlk/python/dlk/core/optimizer.py +++ b/dlk/python/dlk/core/optimizer.py @@ -281,19 +281,23 @@ def pass_compute_thresholds(graph: Graph) -> None: init_threshold = np.full(ch, th_v, dtype=np.float64) # run calculation in reverse order, for example, q -> bn -> scaling + bn_nega_idx = [] trans_th = {'data': init_threshold} for op in p[:-1]: trans_th = op.de_run(**trans_th) + if op.op_type == 'BatchNormalization': + bn_scale = op.input_ops['scale'].data + bn_nega_idx = [v for v in range(len(bn_scale)) if bn_scale[v] < 0] threshold = (trans_th['data'] * np.float64(n)) / (np.float64(max_v) * scaling_factor) for ch_id, th_per_ch in enumerate(threshold): if quantizer_conv_weights.op_type == 'QTZ_binary_channel_wise_mean_scaling': threshold_table[ch_id, th_id] = int(math.floor(th_per_ch)) \ - if (scaling_factor[ch_id] < 0) ^ (ch_id in trans_th['nega_idx']) \ + if (scaling_factor[ch_id] < 0) ^ (ch_id in bn_nega_idx) \ else int(math.ceil(th_per_ch)) else: threshold_table[ch_id, th_id] = int(math.floor(th_per_ch)) \ - if (scaling_factor < 0) ^ (ch_id in trans_th['nega_idx']) \ + if (scaling_factor < 0) ^ (ch_id in bn_nega_idx) \ else int(math.ceil(th_per_ch)) # take care of threshold values that are larger than 13-bit signed integer From 8417067ba6c88bcc12de6f58bed73fe1e4fa7696 Mon Sep 17 00:00:00 2001 From: Nikolay Nez Date: Tue, 8 Jan 2019 15:25:19 +0900 Subject: [PATCH 29/45] Move while loop inside of pass_precompute --- dlk/python/dlk/core/optimizer.py | 103 ++++++++++----------- dlk/python/dlk/scripts/generate_project.py | 4 +- dlk/tests/test_optimizer.py | 12 +-- 3 files changed, 55 insertions(+), 64 deletions(-) diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py index 642687981..76a5e704d 100644 --- a/dlk/python/dlk/core/optimizer.py +++ b/dlk/python/dlk/core/optimizer.py @@ -91,7 +91,7 @@ def pass_transpose(graph: Graph) -> None: m.node.transpose(permutation) -def pass_precompute(graph: Graph, processed_nodes) -> bool: +def pass_precompute(graph: Graph) -> None: """Given a node N, if the value of each input of N is known at compilation time then N will be executed. The node N and its inputs will be replaced with a Constant node which holds the computed output of N. @@ -101,61 +101,60 @@ def pass_precompute(graph: Graph, processed_nodes) -> bool: The input graph. It will be modified in-place. processed_nodes : list The list of the processed nodes so far. - - Returns - ------- - result : bool - True if some nodes were precomputed, False otherwise. """ - p = Pattern('*') - matches = find_pattern(graph, p) - processed_before_precompute = len(processed_nodes) - to_be_removed = [] - - for m in matches: - if m.node in processed_nodes: - continue - - # We want operators with inputs - if not m.node.input_nodes: - continue - - precomputable = True - for input_node in m.node.input_nodes: - if input_node.op_type != 'Constant': - precomputable = False - - if not precomputable: - continue - - processed_nodes += m.node.input_nodes - processed_nodes.append(m.node) - data = m.node.run_forward() - - new_constant = Constant( - m.node.name + '_new', - m.node.dtype, - data, - dimension_format=m.node.dimension - ) - graph.add_op(new_constant) - - # get nodes to be removed after being disconnected - get_nodes_in_branch(m.node, None, to_be_removed) - - new_constant.add_outputs({'output': m.node.output_ops.values()}) - for output_name, consumer_list in m.node.output_ops.items(): - for consumer_node in consumer_list: - for input_name, input_node in consumer_node.input_ops.items(): - if input_node == m.node: - consumer_node.add_input(input_name, new_constant) - break + done = False + processed_nodes = [] + while not done: + p = Pattern('*') + matches = find_pattern(graph, p) + processed_before_precompute = len(processed_nodes) + to_be_removed = [] + + for m in matches: + if m.node in processed_nodes: + continue + + # We want operators with inputs + if not m.node.input_nodes: + continue + + precomputable = True + for input_node in m.node.input_nodes: + if input_node.op_type != 'Constant': + precomputable = False + + if not precomputable: + continue + + processed_nodes += m.node.input_nodes + processed_nodes.append(m.node) + + data = m.node.run_forward() + + new_constant = Constant( + m.node.name + '_new', + m.node.dtype, + data, + dimension_format=m.node.dimension + ) + graph.add_op(new_constant) + + # get nodes to be removed after being disconnected + get_nodes_in_branch(m.node, None, to_be_removed) + + new_constant.add_outputs({'output': m.node.output_ops.values()}) + for output_name, consumer_list in m.node.output_ops.items(): + for consumer_node in consumer_list: + for input_name, input_node in consumer_node.input_ops.items(): + if input_node == m.node: + consumer_node.add_input(input_name, new_constant) + break - for op in to_be_removed: - graph.remove_op(op) + for op in to_be_removed: + graph.remove_op(op) - return len(processed_nodes) > processed_before_precompute + done = len(processed_nodes) == processed_before_precompute def pass_propagate_quantization_details_into_conv(graph: Graph) -> None: diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py index c19d3933b..74b7f0d54 100644 --- a/dlk/python/dlk/scripts/generate_project.py +++ b/dlk/python/dlk/scripts/generate_project.py @@ -65,9 +65,7 @@ def optimize_graph_step(model: Model, config: Config) -> None: pass_propagate_output_type_backward(graph) pass_propagate_datatypes(graph) - processed_nodes = [] - while pass_precompute(graph, processed_nodes=processed_nodes): - pass + pass_precompute(graph) def generate_code_step(model: Model, config: Config) -> None: diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py index 88cbfbeaa..dc46a8527 100644 --- a/dlk/tests/test_optimizer.py +++ b/dlk/tests/test_optimizer.py @@ -41,9 +41,7 @@ def test_precompute1(self) -> None: pass_remove_identities(graph1) pass_transpose(graph1) - processed_nodes = [] - while pass_precompute(graph1, processed_nodes=processed_nodes): - pass + pass_precompute(graph1) self.assertEqual(graph1, graph2, 'precompute failed.') @@ -66,9 +64,7 @@ def test_precompute2(self) -> None: pass_propagate_datatypes(graph1) - processed_nodes = [] - while pass_precompute(graph1, processed_nodes=processed_nodes): - pass + pass_precompute(graph1) self.assertEqual(graph1, graph2, 'precompute failed.') self.assertAlmostEqual(graph1.get_op('conv2').quantizer.scaling_factor, scaling2) # type: ignore @@ -92,9 +88,7 @@ def test_precompute3(self) -> None: pass_propagate_datatypes(graph1) - processed_nodes = [] - while pass_precompute(graph1, processed_nodes=processed_nodes): - pass + pass_precompute(graph1) self.assertEqual(graph1, graph2, 'precompute failed.') self.assertAlmostEqual(graph1.get_op('conv2').quantizer.scaling_factor, scaling2) # type: ignore From a0d4a33ebbe7f22898ebfee98199092eee2edac8 Mon Sep 17 00:00:00 2001 From: Nikolay Nez Date: Tue, 8 Jan 2019 15:27:46 +0900 Subject: [PATCH 30/45] Rename pass_precompute to pass_constant_folding --- dlk/python/dlk/core/optimizer.py | 2 +- dlk/python/dlk/scripts/generate_project.py | 4 ++-- dlk/tests/test_optimizer.py | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py index 76a5e704d..9de82dfdd 100644 --- a/dlk/python/dlk/core/optimizer.py +++ b/dlk/python/dlk/core/optimizer.py @@ -91,7 +91,7 @@ def pass_transpose(graph: Graph) -> None: m.node.transpose(permutation) -def pass_precompute(graph: Graph) -> None: +def pass_constant_folding(graph: Graph) -> None: """Given a node N, if the value of each input of N is known at compilation time then N will be executed. The node N and its inputs will be replaced with a Constant node which holds the computed output of N. diff --git a/dlk/python/dlk/scripts/generate_project.py b/dlk/python/dlk/scripts/generate_project.py index 74b7f0d54..b37982dd2 100644 --- a/dlk/python/dlk/scripts/generate_project.py +++ b/dlk/python/dlk/scripts/generate_project.py @@ -29,7 +29,7 @@ from core.params import Params from code_generater import CodeGenerater from frontend import TensorFlowIO -from core.optimizer import pass_remove_identities, pass_transpose, pass_precompute, \ +from core.optimizer import pass_remove_identities, pass_transpose, pass_constant_folding, \ pass_propagate_quantization_details_into_conv, pass_compute_thresholds, pass_pack_weights, \ pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward @@ -65,7 +65,7 @@ def optimize_graph_step(model: Model, config: Config) -> None: pass_propagate_output_type_backward(graph) pass_propagate_datatypes(graph) - pass_precompute(graph) + pass_constant_folding(graph) def generate_code_step(model: Model, config: Config) -> None: diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py index dc46a8527..e05015346 100644 --- a/dlk/tests/test_optimizer.py +++ b/dlk/tests/test_optimizer.py @@ -16,7 +16,7 @@ """Test file for Optimizer.""" import unittest from core.data_types import Float32, Uint32, Int32, QUANTIZED_NOT_PACKED -from core.optimizer import pass_remove_identities, pass_transpose, pass_precompute, \ +from core.optimizer import pass_remove_identities, pass_transpose, pass_constant_folding, \ pass_propagate_quantization_details_into_conv, pass_compute_thresholds, pass_pack_weights, \ pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward from core.graph import Graph @@ -41,7 +41,7 @@ def test_precompute1(self) -> None: pass_remove_identities(graph1) pass_transpose(graph1) - pass_precompute(graph1) + pass_constant_folding(graph1) self.assertEqual(graph1, graph2, 'precompute failed.') @@ -64,7 +64,7 @@ def test_precompute2(self) -> None: pass_propagate_datatypes(graph1) - pass_precompute(graph1) + pass_constant_folding(graph1) self.assertEqual(graph1, graph2, 'precompute failed.') self.assertAlmostEqual(graph1.get_op('conv2').quantizer.scaling_factor, scaling2) # type: ignore @@ -88,7 +88,7 @@ def test_precompute3(self) -> None: pass_propagate_datatypes(graph1) - pass_precompute(graph1) + pass_constant_folding(graph1) self.assertEqual(graph1, graph2, 'precompute failed.') self.assertAlmostEqual(graph1.get_op('conv2').quantizer.scaling_factor, scaling2) # type: ignore From 2a20dba37b6d289e93fdcf2a30ca61cdd541fdb8 Mon Sep 17 00:00:00 2001 From: nlpng Date: Tue, 8 Jan 2019 15:32:21 +0900 Subject: [PATCH 31/45] rename find_input to output_dtype_changer --- dlk/python/dlk/core/optimizer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py index 9de82dfdd..b4416358a 100644 --- a/dlk/python/dlk/core/optimizer.py +++ b/dlk/python/dlk/core/optimizer.py @@ -462,14 +462,14 @@ def pass_propagate_output_type_backward(graph: Graph) -> None: p = Pattern('*') matches = find_pattern(graph, p) - def find_input(node, otype): + def output_dtype_changer(node, otype): for n in node.input_nodes: if n.op_type == 'Conv' and n.is_quantized: n.dtype = otype return - find_input(n, otype) + output_dtype_changer(n, otype) # propagate output data type to the last quantized convolution output_node = matches[-1].node output_type = output_node.dtype - find_input(output_node, output_type) + output_dtype_changer(output_node, output_type) From fb51eee0a037d619c5874b32bc0af05396b39a78 Mon Sep 17 00:00:00 2001 From: nlpng Date: Fri, 11 Jan 2019 10:02:50 +0900 Subject: [PATCH 32/45] remove some redundant lines --- dlk/python/dlk/core/operators.py | 2 -- dlk/python/dlk/core/optimizer.py | 8 ++++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py index f4cd00b39..e54090d4f 100644 --- a/dlk/python/dlk/core/operators.py +++ b/dlk/python/dlk/core/operators.py @@ -1304,8 +1304,6 @@ def de_run(self, **kwargs) -> Dict: mean = np.float64(self._input_ops['mean'].data) var = np.float64(self._input_ops['var'].data) - kwargs['nega_idx'] = [v for v in range(len(scale)) if scale[v] < 0] - kwargs['data'] = (((kwargs['data'] - beta) / scale) * np.sqrt(var + self.epsilon)) + mean return kwargs diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py index b4416358a..0f133aa21 100644 --- a/dlk/python/dlk/core/optimizer.py +++ b/dlk/python/dlk/core/optimizer.py @@ -243,7 +243,7 @@ def pass_compute_thresholds(graph: Graph) -> None: if p[-1].op_type != 'Conv': continue - quantizer_conv_output_node = p[0] + activation_quantizer_node = p[0] conv_node = p[-1] # check if this is a quantized convolution @@ -313,13 +313,13 @@ def pass_compute_thresholds(graph: Graph) -> None: conv_node.thresholds = threshold_table.flatten().tolist() # get nodes to be removed after being disconnected - get_nodes_in_branch(quantizer_conv_output_node, conv_node, to_be_removed) + get_nodes_in_branch(activation_quantizer_node, conv_node, to_be_removed) # Disconnect the outputs of the quantizer - out_ops = quantizer_conv_output_node.output_ops['output'] + out_ops = activation_quantizer_node.output_ops['output'] for output_node in out_ops: for input_name, input_node in output_node.input_ops.items(): - if input_node == quantizer_conv_output_node: + if input_node == activation_quantizer_node: output_node.add_input(input_name, conv_node) # Disconnect the outputs of the conv From 8dd88be40c2bdad4e7aedc77449b89c68cf86d7e Mon Sep 17 00:00:00 2001 From: Antonio Date: Fri, 11 Jan 2019 16:11:40 +0900 Subject: [PATCH 33/45] Simplify the passes code --- dlk/python/dlk/core/graph_pattern_matching.py | 101 +------------- dlk/python/dlk/core/optimizer.py | 125 ++++++++---------- 2 files changed, 57 insertions(+), 169 deletions(-) diff --git a/dlk/python/dlk/core/graph_pattern_matching.py b/dlk/python/dlk/core/graph_pattern_matching.py index 306cb6d6b..6eb328746 100644 --- a/dlk/python/dlk/core/graph_pattern_matching.py +++ b/dlk/python/dlk/core/graph_pattern_matching.py @@ -13,47 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================= -"""Graph pattern matching module.""" - - -class Pattern: - """Pattern is a sub-graph based on the operator types. - It is a recursive pattern where a Pattern holds a operator type and a list of inputs. - Each input in this list is also a Pattern. - """ - def __init__(self, op=str(), inputs=list()): - self.op = op - self.inputs = inputs - - -class NodeMatch: - """NodeMatch defines a sub-graph that match a given Pattern. - It is a recursive pattern where a NodeMatch holds a reference to the matched node and a list of inputs. - Each input in this list is also a NodeMatch. - """ - def __init__(self): - self.node = None - self.inputs = list() - - -def find_pattern(graph, pattern): - """Helper function that find a pattern in a graph. - - Parameters - ---------- - graph : Graph - The input graph where we will try to find the given pattern. - - pattern : Pattern - The pattern we want to look for. - - Returns - ------- - result : [NodeMatch] - A list of matches. Each element of the list is a NodeMatch. - """ - gm = GraphMatcher(graph) - return gm.get_op_type_matches(pattern) +"""Graph sorting helper functions.""" def sort_graph(graph): @@ -136,62 +96,3 @@ def get_nodes_in_branch(starting_node, stop_node, node_list): for node in starting_node.input_nodes: get_nodes_in_branch(node, stop_node, node_list) - - -class GraphMatcher: - """GraphMatcher is used to find sub-graphs in the computational graph. - """ - def __init__(self, input_graph): - self.graph_node_list = list() - self.graph_node_list = sort_graph(input_graph) - - self._node_map = {node.name: node for node in self.graph_node_list} - - def record_matched_nodes(self, match, matched_nodes): - matched_nodes.add(match.node.name) - for input_node in match.inputs: - self.record_matched_nodes(input_node, matched_nodes) - - def get_op_type_matches(self, pattern): - matches = list() - matched_nodes = set() - for node in self.graph_node_list: - if node in matched_nodes: - continue - - match = NodeMatch() - if self.does_op_type_match(node, pattern, matched_nodes, match): - self.record_matched_nodes(match, matched_nodes) - matches.append(match) - return matches - - def does_op_type_match(self, node, pattern, previously_matched_nodes, match): - if node.name in previously_matched_nodes: - return False - - pattern_matched = False - if pattern.op == '*': - pattern_matched = True - else: - for pattern_op in pattern.op.split('|'): - if node.op_type == pattern_op: - pattern_matched = True - if not pattern_matched: - return False - - match.node = node - if not pattern.inputs: - return True - if len(node.input_nodes) != len(pattern.inputs): - return False - - for i in range(len(pattern.inputs)): - input_node = self._node_map[node.input_nodes[i].name] - input_pattern = pattern.inputs[i] - input_match = NodeMatch() - match.inputs.append(input_match) - - if not self.does_op_type_match(input_node, input_pattern, previously_matched_nodes, input_match): - return False - - return True diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py index 0f133aa21..7b4484783 100644 --- a/dlk/python/dlk/core/optimizer.py +++ b/dlk/python/dlk/core/optimizer.py @@ -18,7 +18,7 @@ import numpy as np from core.graph import Graph -from core.graph_pattern_matching import find_pattern, Pattern, get_nodes_in_branch +from core.graph_pattern_matching import get_nodes_in_branch, sort_graph from core.operators import Constant, Operator from core.data_types import Uint32, QUANTIZED_NOT_PACKED from typing import cast @@ -35,28 +35,26 @@ def pass_remove_identities(graph: Graph) -> None: The input graph. It will be modified in-place. """ - p = Pattern("Identity") - matches = find_pattern(graph, p) + exec_list = [n for n in sort_graph(graph) if n.op_type == 'Identity'] to_be_removed = list() - - for m in matches: + for m in exec_list: """skip all identity.""" - in_op = m.node.input_ops['input'] - out_ops = m.node.output_ops['output'] + in_op = m.input_ops['input'] + out_ops = m.output_ops['output'] for out_op in out_ops: for k, v in out_op.input_ops.items(): - if v == m.node: + if v == m: # change the output's input to this identity's input out_op.add_input(k, in_op) # change the input's output to this identity's output for k2, v2 in in_op.output_ops.items(): - if m.node in v2: - v2.remove(m.node) + if m in v2: + v2.remove(m) v2.append(out_op) break break - to_be_removed.append(m.node) + to_be_removed.append(m) for op in to_be_removed: graph.remove_op(op) @@ -75,12 +73,11 @@ def pass_transpose(graph: Graph) -> None: The input graph. It will be modified in-place. """ - p = Pattern("*") - matches = find_pattern(graph, p) + exec_list = sort_graph(graph) - for m in matches: - dim = m.node.dimension - shape = m.node.shape + for m in exec_list: + dim = m.dimension + shape = m.shape if len(shape) != 4 or len(dim) != 4 or not set(dim).issubset({'N', 'H', 'W', 'C', 'I', 'O'}): continue @@ -88,7 +85,7 @@ def pass_transpose(graph: Graph) -> None: dim = dim.replace('O', 'N') permutation = list(map(lambda s: dim.index(s), 'NHWC')) - m.node.transpose(permutation) + m.transpose(permutation) def pass_constant_folding(graph: Graph) -> None: @@ -106,48 +103,47 @@ def pass_constant_folding(graph: Graph) -> None: done = False processed_nodes = [] while not done: - p = Pattern('*') - matches = find_pattern(graph, p) + exec_list = sort_graph(graph) processed_before_precompute = len(processed_nodes) to_be_removed = [] - for m in matches: - if m.node in processed_nodes: + for m in exec_list: + if m in processed_nodes: continue # We want operators with inputs - if not m.node.input_nodes: + if not m.input_nodes: continue precomputable = True - for input_node in m.node.input_nodes: + for input_node in m.input_nodes: if input_node.op_type != 'Constant': precomputable = False if not precomputable: continue - processed_nodes += m.node.input_nodes - processed_nodes.append(m.node) + processed_nodes += m.input_nodes + processed_nodes.append(m) - data = m.node.run_forward() + data = m.run_forward() new_constant = Constant( - m.node.name + '_new', - m.node.dtype, + m.name + '_new', + m.dtype, data, - dimension_format=m.node.dimension + dimension_format=m.dimension ) graph.add_op(new_constant) # get nodes to be removed after being disconnected - get_nodes_in_branch(m.node, None, to_be_removed) + get_nodes_in_branch(m, None, to_be_removed) - new_constant.add_outputs({'output': m.node.output_ops.values()}) - for output_name, consumer_list in m.node.output_ops.items(): + new_constant.add_outputs({'output': m.output_ops.values()}) + for output_name, consumer_list in m.output_ops.items(): for consumer_node in consumer_list: for input_name, input_node in consumer_node.input_ops.items(): - if input_node == m.node: + if input_node == m: consumer_node.add_input(input_name, new_constant) break @@ -177,8 +173,7 @@ def pass_propagate_quantization_details_into_conv(graph: Graph) -> None: graph : Graph The input graph. It will be modified in-place. """ - p = Pattern('*') - matches = find_pattern(graph, p) + exec_list = sort_graph(graph) qtypes = [ 'QTZ_binary_mean_scaling', 'QTZ_linear_mid_tread_half', @@ -186,29 +181,29 @@ def pass_propagate_quantization_details_into_conv(graph: Graph) -> None: ] quant_details = defaultdict(list) - for m in matches: - if not m.node.preserve_quantization: - quant_details[m.node.name] = [] + for m in exec_list: + if not m.preserve_quantization: + quant_details[m.name] = [] continue - if m.node.op_type == 'Conv': - input_node = m.node.input_nodes[0] - weight_node = m.node.input_nodes[1] + if m.op_type == 'Conv': + input_node = m.input_nodes[0] + weight_node = m.input_nodes[1] - m.node.a_quantizer = [input_node] if input_node.op_type in qtypes else quant_details[input_node.name] - m.node.quantizer = weight_node if weight_node.op_type in qtypes else quant_details[weight_node.name] + m.a_quantizer = [input_node] if input_node.op_type in qtypes else quant_details[input_node.name] + m.quantizer = weight_node if weight_node.op_type in qtypes else quant_details[weight_node.name] - quant_details[m.node.name] = [] + quant_details[m.name] = [] else: qtzs = [] - for n in m.node.input_nodes: + for n in m.input_nodes: if n.op_type in qtypes: qtzs.append(n) else: for q in quant_details[n.name]: qtzs.append(q) - quant_details[m.node.name] = qtzs if len(qtzs) == len(m.node.input_nodes) else [] + quant_details[m.name] = qtzs if len(qtzs) == len(m.input_nodes) else [] # TODO: check if the quantizers use same n_bits @@ -226,13 +221,11 @@ def pass_compute_thresholds(graph: Graph) -> None: graph : Graph The input graph. It will be modified in-place. """ - p = Pattern('QTZ_linear_mid_tread_half') - matches = find_pattern(graph, p) - + exec_list = [n for n in sort_graph(graph) if n.op_type == 'QTZ_linear_mid_tread_half'] to_be_removed = [] - for m in matches: + for m in exec_list: # find a a backward path between the quantizer and the convolution ie. a path represented by a list [Q, ..., C] - p = [m.node] + p = [m] while p[-1].op_type != 'Conv': non_variable_input = [inode for inode in p[-1].input_nodes if (not cast(Operator, inode).is_variable and inode.is_monotonic) @@ -340,8 +333,7 @@ def pass_pack_weights(graph: Graph) -> None: graph : Graph The input graph. It will be modified in-place. """ - p = Pattern('Conv') - matches = find_pattern(graph, p) + exec_list = [n for n in sort_graph(graph) if n.op_type == 'Conv'] quantization_types = [ 'QTZ_binary_mean_scaling', 'QTZ_linear_mid_tread_half', @@ -353,8 +345,8 @@ def pass_pack_weights(graph: Graph) -> None: packer = Packer(weight_bitwidth, word_size) to_be_removed = [] - for m in matches: - conv_node = m.node + for m in exec_list: + conv_node = m # check if this is a quantized convolution if not conv_node.quantizer or not conv_node.a_quantizer: @@ -406,11 +398,9 @@ def pass_quantize_convolutions(graph: Graph) -> None: graph : Graph The input graph. It will be modified in-place. """ - p = Pattern('Conv') - matches = find_pattern(graph, p) - - for m in matches: - conv_node = m.node + exec_list = [n for n in sort_graph(graph) if n.op_type == 'Conv'] + for m in exec_list: + conv_node = m # check if this is a quantized convolution if not conv_node.quantizer or not conv_node.a_quantizer: @@ -437,12 +427,10 @@ def pass_propagate_datatypes(graph) -> None: graph : Graph The input graph. It will be modified in-place. """ - p = Pattern('*') - matches = find_pattern(graph, p) - - for m in matches: - if m.node.op_type != 'Conv' and m.node.preserve_quantization: - m.node.dtype = m.node.input_nodes[0].dtype + exec_list = sort_graph(graph) + for m in exec_list: + if m.op_type != 'Conv' and m.preserve_quantization: + m.dtype = m.input_nodes[0].dtype def pass_propagate_output_type_backward(graph: Graph) -> None: @@ -459,8 +447,7 @@ def pass_propagate_output_type_backward(graph: Graph) -> None: graph : Graph The input graph. It will be modified in-place. """ - p = Pattern('*') - matches = find_pattern(graph, p) + exec_list = sort_graph(graph) def output_dtype_changer(node, otype): for n in node.input_nodes: @@ -470,6 +457,6 @@ def output_dtype_changer(node, otype): output_dtype_changer(n, otype) # propagate output data type to the last quantized convolution - output_node = matches[-1].node + output_node = exec_list[-1] output_type = output_node.dtype output_dtype_changer(output_node, output_type) From 406178db26d68036789383c57bf569eb9d797505 Mon Sep 17 00:00:00 2001 From: nlpng Date: Wed, 16 Jan 2019 19:31:20 +0900 Subject: [PATCH 34/45] Add new tests for optimization passes --- dlk/tests/test_optimizer.py | 914 +++++------------------------------- 1 file changed, 129 insertions(+), 785 deletions(-) diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py index e05015346..0c0555cdf 100644 --- a/dlk/tests/test_optimizer.py +++ b/dlk/tests/test_optimizer.py @@ -27,880 +27,224 @@ from typing import Tuple -class TestOptimizer(unittest.TestCase): - """Test class for GraphRunner.""" +class TestPassTranspose(unittest.TestCase): + """Test class for transposing pass.""" - def test_precompute1(self) -> None: - """Test code for precompute optimizer.""" - data1 = np.random.rand(3, 2, 2, 3) - data2 = np.random.rand(3, 2, 2, 3) - data3 = np.random.rand(3, 2, 2, 3) - graph1 = self.create_sample_graph(data1, data2, data3) - graph2 = self.create_precompute_graph(data1, data2, data3) - - pass_remove_identities(graph1) - pass_transpose(graph1) - - pass_constant_folding(graph1) - - self.assertEqual(graph1, graph2, 'precompute failed.') - - print("Precompute test #1 passed!") - - def test_precompute2(self) -> None: - """Test code for precompute optimizer.""" - data1 = np.random.rand(3, 2, 2, 3) - data2 = np.random.rand(3, 2, 2, 3) - data3 = np.random.rand(3, 2, 2, 3) - graph1 = self.create_sample_graph(data1, data2, data3) - graph2, scaling1, scaling2 = self.create_quantized_graph(data1, data2, data3) - - pass_remove_identities(graph1) - pass_transpose(graph1) - - pass_propagate_quantization_details_into_conv(graph1) - pass_pack_weights(graph1) - pass_quantize_convolutions(graph1) - - pass_propagate_datatypes(graph1) - - pass_constant_folding(graph1) - - self.assertEqual(graph1, graph2, 'precompute failed.') - self.assertAlmostEqual(graph1.get_op('conv2').quantizer.scaling_factor, scaling2) # type: ignore - - print("Precompute test #2 passed!") - - def test_precompute3(self) -> None: - """Test code for precompute optimizer.""" - data1 = np.random.rand(3, 2, 2, 3) - data2 = np.random.rand(3, 2, 2, 3) - data3 = np.random.rand(3, 2, 2, 3) - graph1 = self.create_sample_graph3(data1, data2, data3) - graph2, scaling2, scaling3 = self.create_quantized_graph2(data1, data2, data3) - - pass_remove_identities(graph1) - pass_transpose(graph1) - - pass_propagate_quantization_details_into_conv(graph1) - pass_pack_weights(graph1) - pass_quantize_convolutions(graph1) - - pass_propagate_datatypes(graph1) - - pass_constant_folding(graph1) - - self.assertEqual(graph1, graph2, 'precompute failed.') - self.assertAlmostEqual(graph1.get_op('conv2').quantizer.scaling_factor, scaling2) # type: ignore - self.assertAlmostEqual(graph1.get_op('conv3').quantizer.scaling_factor, scaling3) # type: ignore - - print("Precompute test #3 passed!") - - def test_transpose_NHWC(self) -> None: - """Test code for transpose_NHWC optimizer.""" - data = np.random.rand(3, 2, 2, 1) - graph1 = self.create_sample_graph2(data) - graph2 = self.create_transposed_graph(data) + def test_pass_transpose(self) -> None: + """Test code for transposing optimizer pass.""" + data1 = np.random.rand(3, 2, 2, 1) + graph1 = self.create_sample_graph(data1) + graph2 = self.create_expected_graph(data1) pass_transpose(graph1) self.assertEqual(graph1, graph2, 'transpose to NHWC failed.') - print("Transpose_NHWC test #1 passed!") + print("Test transpose #1 pass passed!") - def create_sample_graph(self, data1: np.ndarray, data2: np.ndarray, data3: np.ndarray) -> Graph: + @staticmethod + def create_sample_graph(data: np.ndarray) -> Graph: graph = Graph() # input - x = Input( - 'placeholder', - [1, 5, 5, 3], - Float32(), - ) + x = Input('placeholder', [3, 5, 5, 1], Float32(), dimension_format='CWHN') # constant and internal nodes - w = Constant( - 'weight', - Float32(), - data1 - ) - - i = Identity( - 'identity1', - [3, 2, 2, 3], - Float32(), - {'input': w} - ) - - t = Transpose( - 'transpose1', - [3, 2, 2, 3], - Float32(), - {'data': i}, - perm=[3, 2, 1, 0] - ) - - q = QTZ_binary_mean_scaling( - 'qtz1', - [3, 2, 2, 3], - Float32(), - {'input': t} - ) + w = Constant('weight', Float32(), data, dimension_format='CWHN') + i1 = Identity('identity1', [3, 2, 2, 1], Float32(), {'input': w}, dimension_format='CWHN') + q = QTZ_binary_mean_scaling('qtz1', [3, 2, 2, 1], Float32(), {'input': i1}, dimension_format='CWHN') # Conv - conv1 = Conv( - 'conv1', - [1, 4, 4, 3], - Float32(), - {'X': x, 'W': q}, - kernel_shape=[2, 2] - ) - - i2 = Identity( - 'identity2', - [1, 4, 4, 3], - Float32(), - {'input': conv1} - ) - - s1 = Constant( - 'aq_const1', - Float32(), - np.array(1) - ) - - s2 = Constant( - 'aq_const2', - Float32(), - np.array(2) - ) - - aq = QTZ_linear_mid_tread_half( - 'aqtz1', - [1, 4, 4, 3], - Float32(), - {'X': i2, 'Y': s1, 'Z': s2} - ) - - dummy = Transpose( - 'dummy', - [1, 4, 4, 3], - Float32(), - {'data': aq}, - perm=[0, 1, 2, 3] - ) - - w2 = Constant( - 'weight2', - Float32(), - data2 - ) - - q2 = QTZ_binary_mean_scaling( - 'qtz2', - [3, 2, 2, 3], - Float32(), - {'input': w2} - ) - - conv2 = Conv( - 'conv2', - [1, 3, 3, 3], - Float32(), - {'X': dummy, 'W': q2}, - kernel_shape=[2, 2] - ) - - s3 = Constant( - 'aq_const1', - Float32(), - np.array(1) - ) - - s4 = Constant( - 'aq_const2', - Float32(), - np.array(2) - ) - - aq2 = QTZ_linear_mid_tread_half( - 'aqtz2', - [1, 3, 3, 3], - Float32(), - {'X': conv2, 'Y': s3, 'Z': s4} - ) - - w3 = Constant( - 'weight3', - Float32(), - data3 - ) - - i3 = Identity( - 'identity3', - [1, 3, 3, 3], - Float32(), - {'input': aq2} - ) - - conv3 = Conv( - 'conv3', - [1, 2, 2, 3], - Float32(), - {'X': i3, 'W': w3}, - kernel_shape=[2, 2] - ) + conv = Conv('conv', [3, 4, 4, 1], Float32(), {'X': x, 'W': q}, kernel_shape=[2, 2], dimension_format='CWHN') + + rs = Reshape('reshape', [1, 48], Float32(), {'data': conv}) # One output - y = Output( - 'output', - [1, 2, 2, 3], - Float32(), - {'input': conv3} - ) + y = Output('output', [1, 48], Float32(), {'input': rs},) # add ops to the graph graph.add_op_and_inputs(y) return graph - def binary_mean_scaling(self, data: np.ndarray) -> Tuple[np.float32, np.ndarray]: - return np.mean(np.abs(data)), np.sign(data).astype(np.float32) - - def create_precompute_graph(self, data1: np.ndarray, data2: np.ndarray, data3: np.ndarray) -> Graph: + @staticmethod + def create_expected_graph(data: np.ndarray) -> Graph: graph = Graph() - # two inputs - x = Input( - 'placeholder', - [1, 5, 5, 3], - Float32(), - ) + data = data.transpose([3, 2, 1, 0]) + + # input + x = Input('placeholder', [1, 5, 5, 3], Float32(), dimension_format='NHWC') - scaling1, qdata = self.binary_mean_scaling(data1.transpose([3, 2, 1, 0])) - w = Constant( - 'weight', - Float32(), - qdata * scaling1 - ) + # constant and internal nodes + w = Constant('weight', Float32(), data, dimension_format='NHWC') + i = Identity('identity1', [1, 2, 2, 3], Float32(), {'input': w}, dimension_format='NHWC') + q = QTZ_binary_mean_scaling('qtz1', [1, 2, 2, 3], Float32(), {'input': i}, dimension_format='NHWC') # Conv - conv1 = Conv( - 'conv1', - [1, 4, 4, 3], - Float32(), - {'X': x, 'W': w}, - kernel_shape=[2, 2] - ) - - s1 = Constant( - 'aq_const1', - Float32(), - np.array(1) - ) - - s2 = Constant( - 'aq_const2', - Float32(), - np.array(2) - ) - - aq = QTZ_linear_mid_tread_half( - 'aqtz1', - [1, 4, 4, 3], - Float32(), - {'X': conv1, 'Y': s1, 'Z': s2} - ) - - dummy = Transpose( - 'dummy', - [1, 4, 4, 3], - Float32(), - {'data': aq}, - perm=[0, 1, 2, 3] - ) - - scaling2, qdata2 = self.binary_mean_scaling(data2) - w2 = Constant( - 'weight2', - Float32(), - qdata2 * scaling2 - ) - - conv2 = Conv( - 'conv2', - [1, 3, 3, 3], - Float32(), - {'X': dummy, 'W': w2}, - kernel_shape=[2, 2] - ) - - s3 = Constant( - 'aq_const1', - Float32(), - np.array(1) - ) - - s4 = Constant( - 'aq_const2', - Float32(), - np.array(2) - ) - - aq2 = QTZ_linear_mid_tread_half( - 'aqtz2', - [1, 3, 3, 3], - Float32(), - {'X': conv2, 'Y': s3, 'Z': s4} - ) - - w3 = Constant( - 'weight3', - Float32(), - data3 - ) - - conv3 = Conv( - 'conv3', - [1, 2, 2, 3], - Float32(), - {'X': aq2, 'W': w3}, - kernel_shape=[2, 2] - ) + conv = Conv('conv', [1, 4, 4, 3], Float32(), {'X': x, 'W': q}, kernel_shape=[2, 2], dimension_format='NHWC') + + rs = Reshape('reshape', [1, 48], Float32(), {'data': conv}) # One output - y = Output( - 'output', - [1, 2, 2, 3], - Float32(), - {'input': conv3} - ) + y = Output('output', [1, 48], Float32(), {'input': rs},) # add ops to the graph graph.add_op_and_inputs(y) return graph - def create_quantized_graph(self, data: np.ndarray, data2: np.ndarray, data3: np.ndarray) \ - -> Tuple[Graph, np.float32, np.float32]: - graph = Graph() - - # two inputs - x = Input( - 'placeholder', - [1, 5, 5, 3], - Float32(), - ) - from modules.packer import Packer - packer = Packer(1, 32) - data = data.transpose([3, 2, 1, 0]) - scaling, qdata = self.binary_mean_scaling(data) - shape = list(data.shape) - w = Constant( - 'weight', - Float32(), - qdata * scaling, - ) - - q = QTZ_binary_mean_scaling( - 'qtz1', - shape, - Float32(), - {'input': w} - ) - q.scaling_factor = scaling +class TestPassRemoveIdentities(unittest.TestCase): + """Test class for removing identity pass.""" - # Conv - conv1 = Conv( - 'conv1', - [1, 4, 4, 3], - Float32(), - {'X': x, 'W': w}, - kernel_shape=[2, 2], - ) - - s1 = Constant( - 'aq_const1', - Float32(), - np.array(1) - ) - - s2 = Constant( - 'aq_const2', - Float32(), - np.array(2) - ) - - aq = QTZ_linear_mid_tread_half( - 'aqtz1', - [1, 4, 4, 3], - QUANTIZED_NOT_PACKED(), - {'X': conv1, 'Y': s1, 'Z': s2} - ) - - dummy = Transpose( - 'dummy', - [1, 4, 4, 3], - QUANTIZED_NOT_PACKED(), - {'data': aq}, - perm=[0, 1, 2, 3] - ) - - scaling2, qdata2 = self.binary_mean_scaling(data2) - w2 = Constant( - 'weight2', - Uint32(), - packer.run(qdata2), - packed=True, - actual_shape=[3, 2, 2, 3] - ) - - # quantizer connected to conv2 as 'conv2.quantizer' - q2 = QTZ_binary_mean_scaling( - 'qtz2', - [3, 2, 2, 3], - Uint32(), - {'input': w2} - ) - q2.scaling_factor = scaling2 - - conv2 = Conv( - 'conv2', - [1, 3, 3, 3], - Float32(), - {'X': dummy, 'W': w2}, - kernel_shape=[2, 2], - quantized=True - ) - conv2.quantizer = q2 - - s3 = Constant( - 'aq_const1', - Float32(), - np.array(1) - ) - - s4 = Constant( - 'aq_const2', - Float32(), - np.array(2) - ) - - aq2 = QTZ_linear_mid_tread_half( - 'aqtz2', - [1, 3, 3, 3], - Float32(), - {'X': conv2, 'Y': s3, 'Z': s4} - ) - - w3 = Constant( - 'weight3', - Float32(), - data3 - ) - - conv3 = Conv( - 'conv3', - [1, 2, 2, 3], - Float32(), - {'X': aq2, 'W': w3}, - kernel_shape=[2, 2] - ) + def test_pass_remove_identities(self) -> None: + """Test code for removing identities optimizer pass.""" + data = np.random.rand(1, 2, 2, 3) + graph1 = self.create_sample_graph(data) + graph2 = self.create_expected_graph(data) - # One output - y = Output( - 'output', - [1, 2, 2, 3], - Float32(), - {'input': conv3} - ) + pass_remove_identities(graph1) - # add ops to the graph - graph.add_op_and_inputs(y) + self.assertEqual(graph1, graph2, 'remove identities failed.') - return graph, scaling, scaling2 + print("Test remove identities #2 pass passed!") - def create_sample_graph2(self, data: np.ndarray) -> Graph: + @staticmethod + def create_sample_graph(data: np.ndarray) -> Graph: graph = Graph() # input - x = Input( - 'placeholder', - [3, 5, 5, 1], - Float32(), - dimension_format='CWHN' - ) + x = Input('placeholder', [1, 5, 5, 3], Float32()) # constant and internal nodes - w = Constant( - 'weight', - Float32(), - data, - dimension_format='CWHN' - ) - - i = Identity( - 'identity1', - [3, 2, 2, 1], - Float32(), - {'input': w}, - dimension_format='CWHN' - ) - - q = QTZ_binary_mean_scaling( - 'qtz1', - [3, 2, 2, 1], - Float32(), - {'input': i}, - dimension_format='CWHN' - ) + w = Constant('weight', Float32(), data) + i1 = Identity('identity1', [1, 2, 2, 3], Float32(), {'input': w}) + q = QTZ_binary_mean_scaling('qtz1', [1, 2, 2, 3], Float32(), {'input': i1}) # Conv - conv = Conv( - 'conv', - [3, 4, 4, 1], - Float32(), - {'X': x, 'W': q}, - kernel_shape=[2, 2], - dimension_format='CWHN' - ) - - rs = Reshape( - 'reshape', - [1, 48], - Float32(), - {'data': conv} - ) + conv = Conv('conv', [1, 4, 4, 3], Float32(), {'X': x, 'W': q}, kernel_shape=[2, 2]) + + i2 = Identity('identity2', [1, 4, 4, 3], Float32(), {'input': conv}) + + rs = Reshape('reshape', [1, 48], Float32(), {'data': i2}) # One output - y = Output( - 'output', - [1, 48], - Float32(), - {'input': rs}, - ) + y = Output('output', [1, 48], Float32(), {'input': rs}, ) # add ops to the graph graph.add_op_and_inputs(y) return graph - def create_transposed_graph(self, data: np.ndarray) -> Graph: + @staticmethod + def create_expected_graph(data: np.ndarray) -> Graph: graph = Graph() - data = data.transpose([3, 2, 1, 0]) # input - x = Input( - 'placeholder', - [1, 5, 5, 3], - Float32(), - dimension_format='NHWC' - ) + x = Input('placeholder', [1, 5, 5, 3], Float32()) # constant and internal nodes - w = Constant( - 'weight', - Float32(), - data, - dimension_format='NHWC' - ) - - i = Identity( - 'identity1', - [1, 2, 2, 3], - Float32(), - {'input': w}, - dimension_format='NHWC' - ) - - q = QTZ_binary_mean_scaling( - 'qtz1', - [1, 2, 2, 3], - Float32(), - {'input': i}, - dimension_format='NHWC' - ) + w = Constant('weight', Float32(), data) + q = QTZ_binary_mean_scaling('qtz1', [1, 2, 2, 3], Float32(), {'input': w}) # Conv - conv = Conv( - 'conv', - [1, 4, 4, 3], - Float32(), - {'X': x, 'W': q}, - kernel_shape=[2, 2], - dimension_format='NHWC' - ) - - rs = Reshape( - 'reshape', - [1, 48], - Float32(), - {'data': conv} - ) + conv = Conv('conv', [1, 4, 4, 3], Float32(), {'X': x, 'W': q}, kernel_shape=[2, 2]) + + rs = Reshape('reshape', [1, 48], Float32(), {'data': conv}) # One output - y = Output( - 'output', - [1, 48], - Float32(), - {'input': rs}, - ) + y = Output('output', [1, 48], Float32(), {'input': rs},) # add ops to the graph graph.add_op_and_inputs(y) return graph - def create_sample_graph3(self, data1: np.ndarray, data2: np.ndarray, data3: np.ndarray) -> Graph: + +class TestPassPropagateQuantizationDetailsIntoConv(unittest.TestCase): + """Test class for propagating quantization details into conv.""" + def test_pass_propagate_quantization_details_into_conv(self) -> None: + """Test pass.""" + data1 = np.random.rand(1, 2, 2, 3) + data2 = np.random.rand(1, 2, 2, 3) + graph1 = self.create_sample_graph(data1, data2) + graph2 = self.create_expected_graph(data1, data2) + + pass_propagate_quantization_details_into_conv(graph1) + aq_g1 = graph1.get_op('conv2').a_quantizer + aq_g2 = graph2.get_op('conv2').a_quantizer + kq_g1 = graph1.get_op('conv2').quantizer + kq_g2 = graph2.get_op('conv2').quantizer + + self.assertEqual(len(aq_g1), len(aq_g2), '[Failed] Found number of activation quantizer not matched') + if aq_g1 and aq_g2: + self.assertEqual(aq_g1[0].op_type, aq_g2[0].op_type, + '[Failed] Found type of activation quantizer not matched') + self.assertEqual(kq_g1.op_type, kq_g2.op_type, '[Failed] Found type of kernel quantizer not matched') + self.assertEqual(graph1, graph2, '[Failed] Expected graph not matched') + + print("Test propagate_quantization_details_into_conv #3 pass passed!") + + @staticmethod + def create_sample_graph(data1: np.ndarray, data2: np.ndarray) -> Graph: graph = Graph() # input - x = Input( - 'placeholder', - [1, 5, 5, 3], - Float32(), - ) + x = Input('placeholder', [1, 5, 5, 3], Float32()) - # constant and internal nodes - w = Constant( - 'weight', - Float32(), - data1 - ) - - q = QTZ_binary_mean_scaling( - 'qtz1', - [3, 2, 2, 3], - Float32(), - {'input': w} - ) + # Conv1 + w1 = Constant('weight1', Float32(), data1) + conv1 = Conv('conv1', [1, 4, 4, 3], Float32(), {'X': x, 'W': w1}, kernel_shape=[2, 2]) - # Conv - conv1 = Conv( - 'conv1', - [1, 4, 4, 3], - Float32(), - {'X': x, 'W': q}, - kernel_shape=[2, 2] - ) - - i2 = Identity( - 'identity2', - [1, 4, 4, 3], - Float32(), - {'input': conv1} - ) - - s1 = Constant( - 'aq_const1', - Float32(), - np.array(1) - ) - - s2 = Constant( - 'aq_const2', - Float32(), - np.array(2) - ) - - aq = QTZ_linear_mid_tread_half( - 'aqtz1', - [1, 4, 4, 3], - Float32(), - {'X': i2, 'Y': s1, 'Z': s2} - ) - - w2 = Constant( - 'weight2', - Float32(), - data2 - ) - - q2 = QTZ_binary_mean_scaling( - 'qtz2', - [3, 2, 2, 3], - Float32(), - {'input': w2} - ) - - conv2 = Conv( - 'conv2', - [1, 3, 3, 3], - Float32(), - {'X': aq, 'W': q2}, - kernel_shape=[2, 2] - ) - - w3 = Constant( - 'weight3', - Float32(), - data3 - ) - - q3 = QTZ_binary_mean_scaling( - 'qtz3', - [3, 2, 2, 3], - Float32(), - {'input': w3} - ) - - conv3 = Conv( - 'conv3', - [1, 3, 3, 3], - Float32(), - {'X': aq, 'W': q3}, - kernel_shape=[2, 2] - ) - - y1 = Output( - 'output1', - [1, 3, 3, 3], - Float32(), - {'input': conv2} - ) - - y2 = Output( - 'output2', - [1, 3, 3, 3], - Float32(), - {'input': conv3} - ) + # activation quantizer + s1 = Constant('aq_const1', Float32(), np.array(1)) + s2 = Constant('aq_const2', Float32(), np.array(2)) + aq = QTZ_linear_mid_tread_half('aqtz1', [1, 4, 4, 3], Float32(), {'X': conv1, 'Y': s1, 'Z': s2}) + + # Conv2 + w2 = Constant('weight2', Float32(), data2) + kq = QTZ_binary_mean_scaling('kqtz1', [1, 2, 2, 3], Float32(), {'input': w2}) + conv2 = Conv('conv2', [1, 3, 3, 3], Float32(), {'X': aq, 'W': kq}, kernel_shape=[2, 2]) + + # One output + y = Output('output', [1, 3, 3, 3], Float32(), {'input': conv2}) # add ops to the graph - graph.add_op_and_inputs(y1) - graph.add_op_and_inputs(y2) + graph.add_op_and_inputs(y) return graph - def create_quantized_graph2(self, data1: np.ndarray, data2: np.ndarray, data3: np.ndarray) -> Graph: + @staticmethod + def create_expected_graph(data1: np.ndarray, data2: np.ndarray) -> Graph: graph = Graph() # input - x = Input( - 'placeholder', - [1, 5, 5, 3], - Float32(), - ) + x = Input('placeholder', [1, 5, 5, 3], Float32()) - # constant and internal nodes - scaling1, qdata1 = self.binary_mean_scaling(data1) - w = Constant( - 'weight', - Float32(), - qdata1 * scaling1 - ) - - q = QTZ_binary_mean_scaling( - 'qtz1', - [3, 2, 2, 3], - Float32(), - {'input': w} - ) + # Conv1 + w1 = Constant('weight1', Float32(), data1) + conv1 = Conv('conv1', [1, 4, 4, 3], Float32(), {'X': x, 'W': w1}, kernel_shape=[2, 2]) - # Conv - conv1 = Conv( - 'conv1', - [1, 4, 4, 3], - Float32(), - {'X': x, 'W': w}, - kernel_shape=[2, 2] - ) - - s1 = Constant( - 'aq_const1', - Float32(), - np.array(1) - ) - - s2 = Constant( - 'aq_const2', - Float32(), - np.array(2) - ) - - aq = QTZ_linear_mid_tread_half( - 'aqtz1', - [1, 4, 4, 3], - QUANTIZED_NOT_PACKED(), - {'X': conv1, 'Y': s1, 'Z': s2} - ) - - from modules.packer import Packer - packer = Packer(1, 32) - scaling2, qdata2 = self.binary_mean_scaling(data2) - w2 = Constant( - 'weight2', - Uint32(), - packer.run(qdata2), - packed=True, - actual_shape=[3, 2, 2, 3] - ) - - q2 = QTZ_binary_mean_scaling( - 'qtz2', - [3, 2, 2, 3], - Float32(), - {'input': w2} - ) - q2.scaling_factor = scaling2 - - conv2 = Conv( - 'conv2', - [1, 3, 3, 3], - Float32(), - {'X': aq, 'W': w2}, - kernel_shape=[2, 2], - quantized=True, - ) - conv2.quantizer = q2 - - scaling3, qdata3 = self.binary_mean_scaling(data3) - w3 = Constant( - 'weight2', - Uint32(), - packer.run(qdata3), - packed=True, - actual_shape=[3, 2, 2, 3] - ) - - q3 = QTZ_binary_mean_scaling( - 'qtz3', - [3, 2, 2, 3], - Float32(), - {'input': w3} - ) - q3.scaling_factor = scaling3 - - conv3 = Conv( - 'conv3', - [1, 3, 3, 3], - Float32(), - {'X': aq, 'W': w3}, - kernel_shape=[2, 2], - quantized=True - ) - conv3.quantizer = q3 - - y1 = Output( - 'output1', - [1, 3, 3, 3], - Float32(), - {'input': conv2} - ) - - y2 = Output( - 'output2', - [1, 3, 3, 3], - Float32(), - {'input': conv3} - ) + # activation quantizer + s1 = Constant('aq_const1', Float32(), np.array(1)) + s2 = Constant('aq_const2', Float32(), np.array(2)) + aq = QTZ_linear_mid_tread_half('aqtz1', [1, 4, 4, 3], Float32(), {'X': conv1, 'Y': s1, 'Z': s2}) + + # Conv2 + w2 = Constant('weight2', Float32(), data2) + kq = QTZ_binary_mean_scaling('kqtz1', [1, 2, 2, 3], Float32(), {'input': w2}) + conv2 = Conv('conv2', [1, 3, 3, 3], Float32(), {'X': aq, 'W': kq}, kernel_shape=[2, 2]) + conv2.a_quantizer = [aq] + conv2.quantizer = kq + + # One output + y = Output('output', [1, 3, 3, 3], Float32(), {'input': conv2}) # add ops to the graph - graph.add_op_and_inputs(y1) - graph.add_op_and_inputs(y2) + graph.add_op_and_inputs(y) - return graph, scaling2, scaling3 + return graph if __name__ == '__main__': From a2c51a14fc6c00db70dd81b804b0b5facb23b2c0 Mon Sep 17 00:00:00 2001 From: nlpng Date: Thu, 17 Jan 2019 08:49:45 +0900 Subject: [PATCH 35/45] just add more tests for more optimizatino pass --- dlk/tests/test_optimizer.py | 174 +++++++++++++++++++++++++++++++++++- 1 file changed, 173 insertions(+), 1 deletion(-) diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py index 0c0555cdf..9a47b70cc 100644 --- a/dlk/tests/test_optimizer.py +++ b/dlk/tests/test_optimizer.py @@ -21,7 +21,8 @@ pass_quantize_convolutions, pass_propagate_datatypes, pass_propagate_output_type_backward from core.graph import Graph from core.operators import Add, AveragePool, BatchNormalization, Constant, Conv, Identity, Input, \ - MaxPool, Operator, Output, Transpose, QTZ_binary_mean_scaling, QTZ_linear_mid_tread_half, Reshape, Softmax + MaxPool, Operator, Output, Transpose, QTZ_binary_mean_scaling, QTZ_linear_mid_tread_half, Reshape, Softmax, \ + SpaceToDepth import numpy as np from typing import Tuple @@ -247,5 +248,176 @@ def create_expected_graph(data1: np.ndarray, data2: np.ndarray) -> Graph: return graph +class TestPassPackWeights(unittest.TestCase): + """Test class for packing weight.""" + def test_pass_pack_weights(self) -> None: + """Test pass.""" + data1 = np.float32(np.random.rand(1, 2, 2, 3)) + data2 = np.float32(np.random.rand(1, 2, 2, 3)) + graph1 = self.create_sample_graph(data1, data2) + + pass_pack_weights(graph1) + + self.assertEqual(graph1.get_op('conv2').input_ops['W'].op_type, 'Constant', + '[Failed] Found input kernel weights not a constant') + + print("Test pack_weights #4 pass passed!") + + @staticmethod + def create_sample_graph(data1: np.ndarray, data2: np.ndarray) -> Graph: + graph = Graph() + + # input + x = Input('placeholder', [1, 5, 5, 3], Float32()) + + # Conv1 + w1 = Constant('weight1', Float32(), data1) + conv1 = Conv('conv1', [1, 4, 4, 3], Float32(), {'X': x, 'W': w1}, kernel_shape=[2, 2]) + + # activation quantizer + s1 = Constant('aq_const1', Float32(), np.array(1)) + s2 = Constant('aq_const2', Float32(), np.array(2)) + aq = QTZ_linear_mid_tread_half('aqtz1', [1, 4, 4, 3], Float32(), {'X': conv1, 'Y': s1, 'Z': s2}) + + # Conv2 + w2 = Constant('weight2', Float32(), data2) + kq = QTZ_binary_mean_scaling('kqtz1', [1, 2, 2, 3], Float32(), {'input': w2}) + conv2 = Conv('conv2', [1, 3, 3, 3], Float32(), {'X': aq, 'W': kq}, kernel_shape=[2, 2]) + conv2.a_quantizer = [aq] + conv2.quantizer = kq + + # One output + y = Output('output', [1, 3, 3, 3], Float32(), {'input': conv2}) + + # add ops to the graph + graph.add_op_and_inputs(y) + + return graph + + +class TestPassQuantizeConvolutions(unittest.TestCase): + """Test class for packing weight.""" + def test_pass_quantize_convolutions(self) -> None: + """Test pass.""" + data1 = np.float32(np.random.rand(1, 2, 2, 3)) + data2 = np.float32(np.random.rand(1, 2, 2, 3)) + graph1 = self.create_sample_graph(data1, data2) + + pass_quantize_convolutions(graph1) + + self.assertEqual(graph1.get_op('aqtz1').dtype, QUANTIZED_NOT_PACKED(), + '[Failed] Found output dtype of activation quantizer not proper') + self.assertEqual(graph1.get_op('conv2').dtype, Float32(), + '[Failed] Found output dtype of conv not proper') + + print("Test quantize_convolutions #5 pass passed!") + + @staticmethod + def create_sample_graph(data1: np.ndarray, data2: np.ndarray) -> Graph: + graph = Graph() + + # input + x = Input('placeholder', [1, 5, 5, 3], Float32()) + + # Conv1 + w1 = Constant('weight1', Float32(), data1) + conv1 = Conv('conv1', [1, 4, 4, 3], Float32(), {'X': x, 'W': w1}, kernel_shape=[2, 2]) + + # activation quantizer + s1 = Constant('aq_const1', Float32(), np.array(1)) + s2 = Constant('aq_const2', Float32(), np.array(2)) + aq = QTZ_linear_mid_tread_half('aqtz1', [1, 4, 4, 3], Float32(), {'X': conv1, 'Y': s1, 'Z': s2}) + + # Conv2 + w2 = Constant('weight2', Float32(), data2) + kq = QTZ_binary_mean_scaling('kqtz1', [1, 2, 2, 3], Float32(), {'input': w2}) + conv2 = Conv('conv2', [1, 3, 3, 3], Float32(), {'X': aq, 'W': kq}, kernel_shape=[2, 2]) + conv2.a_quantizer = [aq] + conv2.quantizer = kq + + # One output + y = Output('output', [1, 3, 3, 3], Float32(), {'input': conv2}) + + # add ops to the graph + graph.add_op_and_inputs(y) + + return graph + + +class TestPassPropagateDatatypes(unittest.TestCase): + """Test class for packing weight.""" + def test_pass_propagate_datatypes(self) -> None: + """Test pass.""" + data1 = np.float32(np.random.rand(1, 2, 2, 3)) + graph1 = self.create_sample_graph(data1) + # graph2 = self.create_expected_graph(data1, data2) + + pass_propagate_datatypes(graph1) + + self.assertEqual(graph1.get_op('s2d').dtype, QUANTIZED_NOT_PACKED(), + '[Failed] Found dtype of SpaceToDepth not propagate correctly') + + print("Test propagate datatypes #6 pass passed!") + + @staticmethod + def create_sample_graph(data1: np.ndarray) -> Graph: + graph = Graph() + + # input + x = Input('placeholder', [1, 5, 5, 3], Float32()) + + # Conv1 + w1 = Constant('weight1', Float32(), data1) + conv1 = Conv('conv1', [1, 4, 4, 3], QUANTIZED_NOT_PACKED(), {'X': x, 'W': w1}, kernel_shape=[2, 2]) + + pool1 = SpaceToDepth('s2d', [1, 2, 2, 12], Float32(), {'input': conv1}) + + # One output + y = Output('output', [1, 2, 2, 12], Float32(), {'input': pool1}) + + # add ops to the graph + graph.add_op_and_inputs(y) + + return graph + + +class TestPassPropagateOutputTypeBackward(unittest.TestCase): + """Test class for packing weight.""" + def test_pass_propagate_output_type_backward(self) -> None: + """Test pass.""" + data1 = np.float32(np.random.rand(1, 2, 2, 3)) + graph1 = self.create_sample_graph(data1) + # graph2 = self.create_expected_graph(data1, data2) + + pass_propagate_output_type_backward(graph1) + + self.assertEqual(graph1.get_op('conv1').dtype, Float32(), + '[Failed] Found dtype of SpaceToDepth not propagate correctly') + + print("Test propagate output type backward #7 pass passed!") + + @staticmethod + def create_sample_graph(data1: np.ndarray) -> Graph: + graph = Graph() + + # input + x = Input('placeholder', [1, 5, 5, 3], Float32()) + + # Conv1 + w1 = Constant('weight1', Float32(), data1) + conv1 = Conv('conv1', [1, 4, 4, 3], QUANTIZED_NOT_PACKED(), {'X': x, 'W': w1}, kernel_shape=[2, 2]) + conv1.is_quantized = True + + pool1 = SpaceToDepth('s2d', [1, 2, 2, 12], Float32(), {'input': conv1}) + + # One output + y = Output('output', [1, 2, 2, 12], Float32(), {'input': pool1}) + + # add ops to the graph + graph.add_op_and_inputs(y) + + return graph + + if __name__ == '__main__': unittest.main() From aa5bb3d1351cfd7397d3507654f4f94cf268436e Mon Sep 17 00:00:00 2001 From: nlpng Date: Thu, 17 Jan 2019 10:59:12 +0900 Subject: [PATCH 36/45] Add more more tests for optimization pass --- dlk/tests/test_optimizer.py | 64 +++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py index 9a47b70cc..2f912691a 100644 --- a/dlk/tests/test_optimizer.py +++ b/dlk/tests/test_optimizer.py @@ -419,5 +419,69 @@ def create_sample_graph(data1: np.ndarray) -> Graph: return graph +class TestPassComputeThresholds(unittest.TestCase): + """Test class for packing weight.""" + def test_pass_compute_thresholds(self) -> None: + """Test pass.""" + data1 = np.float32(np.random.rand(1, 2, 2, 3)) + data2 = np.float32(np.random.rand(1, 2, 2, 3)) + graph1 = self.create_sample_graph(data1, data2) + # graph2 = self.create_expected_graph(data1, data2) + + pass_compute_thresholds(graph1) + + self.assertEqual(graph1.get_op('conv2').has_thresholds, True, + '[Failed] Found threshold of Conv not calculated') + + print("Test compute_thresholds #8 pass passed!") + + @staticmethod + def create_sample_graph(data1: np.ndarray, data2: np.ndarray) -> Graph: + graph = Graph() + + # input + x = Input('placeholder', [1, 5, 5, 3], Float32()) + + # Conv1 + w1 = Constant('weight1', Float32(), data1) + conv1 = Conv('conv1', [1, 4, 4, 3], Float32(), {'X': x, 'W': w1}, kernel_shape=[2, 2]) + + # activation quantizer + s1 = Constant('aq_const1', Int32(), np.array([2], dtype=np.int32)) + s2 = Constant('aq_const2', Float32(), np.array([2.0], dtype=np.float32)) + aq1 = QTZ_linear_mid_tread_half('aqtz1', [1, 4, 4, 3], Float32(), {'X': conv1, 'Y': s1, 'Z': s2}) + + # Conv2 + w2 = Constant('weight2', Float32(), data2) + kq = QTZ_binary_mean_scaling('kqtz1', [1, 2, 2, 3], Float32(), {'input': w2}) + conv2 = Conv('conv2', [1, 3, 3, 3], Float32(), {'X': aq1, 'W': kq}, kernel_shape=[2, 2]) + conv2.a_quantizer = [aq1] + conv2.quantizer = kq + conv2.is_quantized = True + + sc = Constant('bn_scale', Float32(), np.random.rand(3)) + be = Constant('bn_b', Float32(), np.random.rand(3)) + mu = Constant('bn_mu', Float32(), np.random.rand(3)) + va = Constant('bn_var', Float32(), np.random.rand(3)) + bn = BatchNormalization('bn', [1, 3, 3, 3], Float32(), {'X': conv2, + 'scale': sc, + 'B': be, + 'mean': mu, + 'var': va}) + + # activation quantizer + s3 = Constant('aq_const3', Int32(), np.array([2], dtype=np.int32)) + s4 = Constant('aq_const4', Float32(), np.array([2.0], dtype=np.float32)) + aq2 = QTZ_linear_mid_tread_half('aqtz2', [1, 3, 3, 3], Float32(), {'X': bn, 'Y': s3, 'Z': s4}) + + # One output + y = Output('output', [1, 3, 3, 3], Float32(), {'input': aq2}) + + # add ops to the graph + graph.add_op_and_inputs(y) + + return graph + + if __name__ == '__main__': unittest.main() From d3b496e99b9510764a7f1db1e0d70cfcc2b06278 Mon Sep 17 00:00:00 2001 From: nlpng Date: Thu, 17 Jan 2019 13:23:28 +0900 Subject: [PATCH 37/45] Add last test for constant folding pass --- dlk/tests/test_optimizer.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py index 2f912691a..819760bd7 100644 --- a/dlk/tests/test_optimizer.py +++ b/dlk/tests/test_optimizer.py @@ -483,5 +483,38 @@ def create_sample_graph(data1: np.ndarray, data2: np.ndarray) -> Graph: return graph +class TestPassConstantFolding(unittest.TestCase): + """Test class for packing weight.""" + def test_pass_constant_folding(self) -> None: + """Test pass.""" + graph1 = self.create_sample_graph() + + pass_constant_folding(graph1) + + self.assertEqual(set(graph1.get_op('potatoes_new').data), set(np.array([2, 5])), + '[Failed] Found folded constant not correct') + + print("Test constant folding #9 pass passed!") + + @staticmethod + def create_sample_graph() -> Graph: + graph = Graph() + + x = Input('placeholder', [2], Float32()) + + s1 = Constant('potato_1', Float32(), np.array([1, 2])) + s2 = Constant('potato_2', Float32(), np.array([1, 3])) + add1 = Add('potatoes', [2], Float32(), {'A': s1, 'B': s2}) + add2 = Add('more_potatoes', [2], Float32(), {'A': x, 'B': add1}) + + # One output + y = Output('output', [2], Float32(), {'input': add2}) + + # add ops to the graph + graph.add_op_and_inputs(y) + + return graph + + if __name__ == '__main__': unittest.main() From ebc467931aa9286368bccedc567782810e36020c Mon Sep 17 00:00:00 2001 From: nlpng Date: Thu, 17 Jan 2019 14:23:51 +0900 Subject: [PATCH 38/45] Add functions comments for run and de_run --- dlk/python/dlk/core/operators.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/dlk/python/dlk/core/operators.py b/dlk/python/dlk/core/operators.py index e54090d4f..ff9460ea9 100644 --- a/dlk/python/dlk/core/operators.py +++ b/dlk/python/dlk/core/operators.py @@ -1289,6 +1289,11 @@ def _check_consistency(self) -> None: self._assert(x_shape == self.shape, message) def run(self, **kwargs) -> Dict: + """Return the forward calculation results of batch normalization. + + Currently this function is only used by threshold skipping optimization pass + for recursively calculating thresholds of the skipping patterns. + """ scale = np.float64(self._input_ops['scale'].data) beta = np.float64(self._input_ops['B'].data) mean = np.float64(self._input_ops['mean'].data) @@ -1299,6 +1304,11 @@ def run(self, **kwargs) -> Dict: return kwargs def de_run(self, **kwargs) -> Dict: + """Return the reversed calculation results of batch normalization. + + Currently this function is only used by threshold skipping optimization pass + for recursively calculating thresholds of the skipping patterns. + """ scale = np.float64(self._input_ops['scale'].data) beta = np.float64(self._input_ops['B'].data) mean = np.float64(self._input_ops['mean'].data) @@ -1370,6 +1380,11 @@ def _check_consistency(self) -> None: self._assert(x_shape == self.shape, message) def run(self, **kwargs) -> Dict: + """Return the result of forward calculation of an activation quantizer. + + Currently this function is only used by threshold skipping optimization pass + for recursively calculating thresholds of the skipping patterns. + """ bit = self._input_ops['Y'].data max_value = np.float64(self._input_ops['Z'].data) in_data = np.float64(kwargs['data']) @@ -1380,6 +1395,11 @@ def run(self, **kwargs) -> Dict: return kwargs def de_run(self, **kwargs) -> Dict: + """Return the result of reversed calculation of an activation quantizer. + + Currently this function is only used by threshold skipping optimization pass + for recursively calculating thresholds of the skipping patterns. + """ bit = self._input_ops['Y'].data max_value = np.float64(self._input_ops['Z'].data) in_data = np.float64(kwargs['data']) @@ -1389,6 +1409,10 @@ def de_run(self, **kwargs) -> Dict: return kwargs def run_forward(self) -> np.ndarray: + """General function for this quantization operator. + + This function returns numpy array. + """ data_dict = self.run(data=self._input_ops['X'].data) self._data = data_dict['data'] return self._data From 9b5c66b1a001623c18ad1e6493ad5d272dc6d7ca Mon Sep 17 00:00:00 2001 From: nlpng Date: Thu, 17 Jan 2019 14:32:48 +0900 Subject: [PATCH 39/45] Remove redundant codes and comments --- dlk/tests/test_optimizer.py | 41 +++++++++++++++---------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py index 819760bd7..3c250ccfe 100644 --- a/dlk/tests/test_optimizer.py +++ b/dlk/tests/test_optimizer.py @@ -25,23 +25,21 @@ SpaceToDepth import numpy as np -from typing import Tuple class TestPassTranspose(unittest.TestCase): """Test class for transposing pass.""" - def test_pass_transpose(self) -> None: """Test code for transposing optimizer pass.""" - data1 = np.random.rand(3, 2, 2, 1) - graph1 = self.create_sample_graph(data1) - graph2 = self.create_expected_graph(data1) + data = np.random.rand(3, 2, 2, 1) + graph1 = self.create_sample_graph(data) + graph2 = self.create_expected_graph(data) pass_transpose(graph1) self.assertEqual(graph1, graph2, 'transpose to NHWC failed.') - print("Test transpose #1 pass passed!") + print("Test pass #1 transpose passed!") @staticmethod def create_sample_graph(data: np.ndarray) -> Graph: @@ -58,9 +56,8 @@ def create_sample_graph(data: np.ndarray) -> Graph: # Conv conv = Conv('conv', [3, 4, 4, 1], Float32(), {'X': x, 'W': q}, kernel_shape=[2, 2], dimension_format='CWHN') - rs = Reshape('reshape', [1, 48], Float32(), {'data': conv}) - # One output + rs = Reshape('reshape', [1, 48], Float32(), {'data': conv}) y = Output('output', [1, 48], Float32(), {'input': rs},) # add ops to the graph @@ -98,7 +95,6 @@ def create_expected_graph(data: np.ndarray) -> Graph: class TestPassRemoveIdentities(unittest.TestCase): """Test class for removing identity pass.""" - def test_pass_remove_identities(self) -> None: """Test code for removing identities optimizer pass.""" data = np.random.rand(1, 2, 2, 3) @@ -109,7 +105,7 @@ def test_pass_remove_identities(self) -> None: self.assertEqual(graph1, graph2, 'remove identities failed.') - print("Test remove identities #2 pass passed!") + print("Test pass #2 remove identities passed!") @staticmethod def create_sample_graph(data: np.ndarray) -> Graph: @@ -126,12 +122,10 @@ def create_sample_graph(data: np.ndarray) -> Graph: # Conv conv = Conv('conv', [1, 4, 4, 3], Float32(), {'X': x, 'W': q}, kernel_shape=[2, 2]) + # One output i2 = Identity('identity2', [1, 4, 4, 3], Float32(), {'input': conv}) - rs = Reshape('reshape', [1, 48], Float32(), {'data': i2}) - - # One output - y = Output('output', [1, 48], Float32(), {'input': rs}, ) + y = Output('output', [1, 48], Float32(), {'input': rs},) # add ops to the graph graph.add_op_and_inputs(y) @@ -152,9 +146,8 @@ def create_expected_graph(data: np.ndarray) -> Graph: # Conv conv = Conv('conv', [1, 4, 4, 3], Float32(), {'X': x, 'W': q}, kernel_shape=[2, 2]) - rs = Reshape('reshape', [1, 48], Float32(), {'data': conv}) - # One output + rs = Reshape('reshape', [1, 48], Float32(), {'data': conv}) y = Output('output', [1, 48], Float32(), {'input': rs},) # add ops to the graph @@ -185,7 +178,7 @@ def test_pass_propagate_quantization_details_into_conv(self) -> None: self.assertEqual(kq_g1.op_type, kq_g2.op_type, '[Failed] Found type of kernel quantizer not matched') self.assertEqual(graph1, graph2, '[Failed] Expected graph not matched') - print("Test propagate_quantization_details_into_conv #3 pass passed!") + print("Test pass #3 propagate_quantization_details_into_conv passed!") @staticmethod def create_sample_graph(data1: np.ndarray, data2: np.ndarray) -> Graph: @@ -261,7 +254,7 @@ def test_pass_pack_weights(self) -> None: self.assertEqual(graph1.get_op('conv2').input_ops['W'].op_type, 'Constant', '[Failed] Found input kernel weights not a constant') - print("Test pack_weights #4 pass passed!") + print("Test pass #4 pack_weights passed!") @staticmethod def create_sample_graph(data1: np.ndarray, data2: np.ndarray) -> Graph: @@ -310,7 +303,7 @@ def test_pass_quantize_convolutions(self) -> None: self.assertEqual(graph1.get_op('conv2').dtype, Float32(), '[Failed] Found output dtype of conv not proper') - print("Test quantize_convolutions #5 pass passed!") + print("Test pass #5 quantize_convolutions passed!") @staticmethod def create_sample_graph(data1: np.ndarray, data2: np.ndarray) -> Graph: @@ -357,7 +350,7 @@ def test_pass_propagate_datatypes(self) -> None: self.assertEqual(graph1.get_op('s2d').dtype, QUANTIZED_NOT_PACKED(), '[Failed] Found dtype of SpaceToDepth not propagate correctly') - print("Test propagate datatypes #6 pass passed!") + print("Test pass #6 propagate data types passed!") @staticmethod def create_sample_graph(data1: np.ndarray) -> Graph: @@ -387,14 +380,13 @@ def test_pass_propagate_output_type_backward(self) -> None: """Test pass.""" data1 = np.float32(np.random.rand(1, 2, 2, 3)) graph1 = self.create_sample_graph(data1) - # graph2 = self.create_expected_graph(data1, data2) pass_propagate_output_type_backward(graph1) self.assertEqual(graph1.get_op('conv1').dtype, Float32(), '[Failed] Found dtype of SpaceToDepth not propagate correctly') - print("Test propagate output type backward #7 pass passed!") + print("Test pass #7 propagate output type backward passed!") @staticmethod def create_sample_graph(data1: np.ndarray) -> Graph: @@ -426,14 +418,13 @@ def test_pass_compute_thresholds(self) -> None: data1 = np.float32(np.random.rand(1, 2, 2, 3)) data2 = np.float32(np.random.rand(1, 2, 2, 3)) graph1 = self.create_sample_graph(data1, data2) - # graph2 = self.create_expected_graph(data1, data2) pass_compute_thresholds(graph1) self.assertEqual(graph1.get_op('conv2').has_thresholds, True, '[Failed] Found threshold of Conv not calculated') - print("Test compute_thresholds #8 pass passed!") + print("Test pass #8 compute_thresholds passed!") @staticmethod def create_sample_graph(data1: np.ndarray, data2: np.ndarray) -> Graph: @@ -494,7 +485,7 @@ def test_pass_constant_folding(self) -> None: self.assertEqual(set(graph1.get_op('potatoes_new').data), set(np.array([2, 5])), '[Failed] Found folded constant not correct') - print("Test constant folding #9 pass passed!") + print("Test pass #9 constant folding passed!") @staticmethod def create_sample_graph() -> Graph: From 4b006f9ced893729fef34782f8eed6f2d3235799 Mon Sep 17 00:00:00 2001 From: nlpng Date: Fri, 18 Jan 2019 09:18:09 +0900 Subject: [PATCH 40/45] just move things around --- dlk/tests/test_optimizer.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py index 3c250ccfe..ad9e02f8e 100644 --- a/dlk/tests/test_optimizer.py +++ b/dlk/tests/test_optimizer.py @@ -76,15 +76,14 @@ def create_expected_graph(data: np.ndarray) -> Graph: # constant and internal nodes w = Constant('weight', Float32(), data, dimension_format='NHWC') - i = Identity('identity1', [1, 2, 2, 3], Float32(), {'input': w}, dimension_format='NHWC') - q = QTZ_binary_mean_scaling('qtz1', [1, 2, 2, 3], Float32(), {'input': i}, dimension_format='NHWC') + i1 = Identity('identity1', [1, 2, 2, 3], Float32(), {'input': w}, dimension_format='NHWC') + q = QTZ_binary_mean_scaling('qtz1', [1, 2, 2, 3], Float32(), {'input': i1}, dimension_format='NHWC') # Conv conv = Conv('conv', [1, 4, 4, 3], Float32(), {'X': x, 'W': q}, kernel_shape=[2, 2], dimension_format='NHWC') - rs = Reshape('reshape', [1, 48], Float32(), {'data': conv}) - # One output + rs = Reshape('reshape', [1, 48], Float32(), {'data': conv}) y = Output('output', [1, 48], Float32(), {'input': rs},) # add ops to the graph From ca65f4ec73ad368f8aa65d11c1911e82fa593930 Mon Sep 17 00:00:00 2001 From: nlpng Date: Tue, 29 Jan 2019 12:15:50 +0900 Subject: [PATCH 41/45] add new assertion check for quantize convolution pass --- dlk/python/dlk/core/optimizer.py | 2 +- dlk/tests/test_optimizer.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py index 7b4484783..e9365a507 100644 --- a/dlk/python/dlk/core/optimizer.py +++ b/dlk/python/dlk/core/optimizer.py @@ -414,7 +414,7 @@ def pass_quantize_convolutions(graph: Graph) -> None: conv_node.dtype = QUANTIZED_NOT_PACKED() # change the output data type of the quantizers - conv_node.quantizer.dtype = Uint32 + conv_node.quantizer.dtype = Uint32() for qtz in conv_node.a_quantizer: qtz.dtype = QUANTIZED_NOT_PACKED() diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py index ad9e02f8e..28a1b2f82 100644 --- a/dlk/tests/test_optimizer.py +++ b/dlk/tests/test_optimizer.py @@ -299,6 +299,8 @@ def test_pass_quantize_convolutions(self) -> None: self.assertEqual(graph1.get_op('aqtz1').dtype, QUANTIZED_NOT_PACKED(), '[Failed] Found output dtype of activation quantizer not proper') + self.assertEqual(graph1.get_op('kqtz1').dtype, Uint32(), + '[Failed] Found output dtype of kernel quantizer not proper') self.assertEqual(graph1.get_op('conv2').dtype, Float32(), '[Failed] Found output dtype of conv not proper') From 39b86783e62104ae7ecea895145dadbacd8b57a5 Mon Sep 17 00:00:00 2001 From: nlpng Date: Tue, 29 Jan 2019 12:32:48 +0900 Subject: [PATCH 42/45] add test for graph has no kernel quantizer --- dlk/tests/test_optimizer.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py index 28a1b2f82..ed625e072 100644 --- a/dlk/tests/test_optimizer.py +++ b/dlk/tests/test_optimizer.py @@ -247,12 +247,16 @@ def test_pass_pack_weights(self) -> None: data1 = np.float32(np.random.rand(1, 2, 2, 3)) data2 = np.float32(np.random.rand(1, 2, 2, 3)) graph1 = self.create_sample_graph(data1, data2) - pass_pack_weights(graph1) - self.assertEqual(graph1.get_op('conv2').input_ops['W'].op_type, 'Constant', '[Failed] Found input kernel weights not a constant') + graph_2_1 = self.create_sample_graph_2(data1) + graph_2_2 = self.create_sample_graph_2(data1) + pass_pack_weights(graph_2_2) + self.assertEqual(graph_2_1, graph_2_2, + '[Failed] Found optimized graph not the same') + print("Test pass #4 pack_weights passed!") @staticmethod @@ -286,6 +290,24 @@ def create_sample_graph(data1: np.ndarray, data2: np.ndarray) -> Graph: return graph + @staticmethod + def create_sample_graph_2(data1: np.ndarray) -> Graph: + graph = Graph() + + # input + x = Input('placeholder', [1, 5, 5, 3], Float32()) + + # Conv1 + w1 = Constant('weight1', Float32(), data1) + conv1 = Conv('conv1', [1, 4, 4, 3], Float32(), {'X': x, 'W': w1}, kernel_shape=[2, 2]) + + y = Output('output', [1, 4, 4, 3], Float32(), {'input': conv1}) + + # add ops to the graph + graph.add_op_and_inputs(y) + + return graph + class TestPassQuantizeConvolutions(unittest.TestCase): """Test class for packing weight.""" From b5e67a810763d41f4e7324a2f7f98b4aa3820568 Mon Sep 17 00:00:00 2001 From: nlpng Date: Tue, 29 Jan 2019 12:44:41 +0900 Subject: [PATCH 43/45] add test for graph has no weights --- dlk/tests/test_optimizer.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dlk/tests/test_optimizer.py b/dlk/tests/test_optimizer.py index ed625e072..6647abad9 100644 --- a/dlk/tests/test_optimizer.py +++ b/dlk/tests/test_optimizer.py @@ -246,6 +246,7 @@ def test_pass_pack_weights(self) -> None: """Test pass.""" data1 = np.float32(np.random.rand(1, 2, 2, 3)) data2 = np.float32(np.random.rand(1, 2, 2, 3)) + graph1 = self.create_sample_graph(data1, data2) pass_pack_weights(graph1) self.assertEqual(graph1.get_op('conv2').input_ops['W'].op_type, 'Constant', @@ -301,7 +302,10 @@ def create_sample_graph_2(data1: np.ndarray) -> Graph: w1 = Constant('weight1', Float32(), data1) conv1 = Conv('conv1', [1, 4, 4, 3], Float32(), {'X': x, 'W': w1}, kernel_shape=[2, 2]) - y = Output('output', [1, 4, 4, 3], Float32(), {'input': conv1}) + s1 = Constant('const1', Float32(), np.zeros([1, 4, 4, 3])) + add1 = Add('add', [1, 4, 4, 3], Float32(), {'A': conv1, 'B': s1}) + + y = Output('output', [1, 4, 4, 3], Float32(), {'input': add1}) # add ops to the graph graph.add_op_and_inputs(y) From 82a72c74d956ba08bb97087c70a215c602f10157 Mon Sep 17 00:00:00 2001 From: nlpng Date: Tue, 29 Jan 2019 12:50:06 +0900 Subject: [PATCH 44/45] fix the copyright year --- dlk/python/dlk/core/graph_pattern_matching.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlk/python/dlk/core/graph_pattern_matching.py b/dlk/python/dlk/core/graph_pattern_matching.py index 6eb328746..79a54757c 100644 --- a/dlk/python/dlk/core/graph_pattern_matching.py +++ b/dlk/python/dlk/core/graph_pattern_matching.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2018 The Blueoil Authors. All Rights Reserved. +# Copyright 2019 The Blueoil Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From d216594933599112eebde063bafff48f916a8925 Mon Sep 17 00:00:00 2001 From: nlpng Date: Tue, 29 Jan 2019 13:26:24 +0900 Subject: [PATCH 45/45] take care the negative values --- dlk/python/dlk/core/optimizer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dlk/python/dlk/core/optimizer.py b/dlk/python/dlk/core/optimizer.py index e9365a507..433e7ddb9 100644 --- a/dlk/python/dlk/core/optimizer.py +++ b/dlk/python/dlk/core/optimizer.py @@ -293,7 +293,8 @@ def pass_compute_thresholds(graph: Graph) -> None: else int(math.ceil(th_per_ch)) # take care of threshold values that are larger than 13-bit signed integer - threshold_table[abs(threshold_table) > max_th_value] = max_th_value + threshold_table[threshold_table > max_th_value] = max_th_value + threshold_table[threshold_table < -max_th_value] = -max_th_value for c in range(ch): threshold_table[c, -1] = 1 \