From 674c241a0132283b2689686de98cbdb53624e262 Mon Sep 17 00:00:00 2001 From: "Steven S. Lyubomirsky" Date: Mon, 7 Dec 2020 17:10:32 -0800 Subject: [PATCH] Decouple data_layout from etab --- python/tvm/relay/frontend/keras.py | 126 ++++++++++++----------------- 1 file changed, 51 insertions(+), 75 deletions(-) diff --git a/python/tvm/relay/frontend/keras.py b/python/tvm/relay/frontend/keras.py index d468ca78db7ba..8e968bf061462 100644 --- a/python/tvm/relay/frontend/keras.py +++ b/python/tvm/relay/frontend/keras.py @@ -65,10 +65,8 @@ def _convert_recurrent_activation(inexpr, keras_layer): def _convert_activation( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument - if data_layout is None: - data_layout = etab.data_layout if isinstance(keras_layer, str): act_type = keras_layer else: @@ -127,12 +125,10 @@ def _convert_activation( ) -def _convert_advanced_activation(inexpr, keras_layer, etab, input_shape=None, data_layout=None): +def _convert_advanced_activation(inexpr, keras_layer, etab, data_layout, input_shape=None): act_type = type(keras_layer).__name__ if input_shape is None: input_shape = keras_layer.input_shape - if data_layout is None: - data_layout = etab.data_layout if act_type == "Softmax": axis = keras_layer.axis @@ -241,7 +237,7 @@ def _convert_permute( def _convert_embedding( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument indices = inexpr weightList = keras_layer.get_weights() @@ -252,7 +248,7 @@ def _convert_embedding( def _convert_dense( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument weightList = keras_layer.get_weights() weight = etab.new_const(weightList[0].transpose([1, 0])) @@ -278,13 +274,13 @@ def _convert_dense( else: act_type = keras_layer.activation.__name__ if act_type != "linear": - out = _convert_activation(out, act_type, etab) + out = _convert_activation(out, act_type, etab, data_layout) if input_dim > 2: out = _op.expand_dims(out, axis=0) return out -def _convert_convolution(inexpr, keras_layer, etab, input_shape=None, data_layout=None): +def _convert_convolution(inexpr, keras_layer, etab, data_layout, input_shape=None): _check_data_format(keras_layer) is_deconv = type(keras_layer).__name__ == "Conv2DTranspose" is_depthconv = type(keras_layer).__name__ == "DepthwiseConv2D" @@ -292,8 +288,6 @@ def _convert_convolution(inexpr, keras_layer, etab, input_shape=None, data_layou weight = weightList[0] if input_shape is None: input_shape = keras_layer.input_shape - if data_layout is None: - data_layout = etab.data_layout if data_layout == "NHWC": if is_depthconv: @@ -366,19 +360,17 @@ def _convert_convolution(inexpr, keras_layer, etab, input_shape=None, data_layou else: act_type = keras_layer.activation.__name__ if act_type != "linear": - out = _convert_activation(out, act_type, etab) + out = _convert_activation(out, act_type, etab, data_layout) return out -def _convert_convolution3d(inexpr, keras_layer, etab, input_shape=None, data_layout=None): +def _convert_convolution3d(inexpr, keras_layer, etab, data_layout, input_shape=None): _check_data_format(keras_layer) weightList = keras_layer.get_weights() weight = weightList[0] if input_shape is None: input_shape = keras_layer.input_shape - if data_layout is None: - data_layout = etab.data_layout if data_layout == "NDHWC": kernel_layout = "DHWIO" @@ -455,10 +447,8 @@ def _convert_convolution3d(inexpr, keras_layer, etab, input_shape=None, data_lay return out -def _convert_separable_convolution(inexpr, keras_layer, etab, input_shape=None, data_layout=None): +def _convert_separable_convolution(inexpr, keras_layer, etab, data_layout, input_shape=None): _check_data_format(keras_layer) - if data_layout is None: - data_layout = etab.data_layout if data_layout == "NHWC": kernel_layout = "HWOI" @@ -532,16 +522,14 @@ def _convert_separable_convolution(inexpr, keras_layer, etab, input_shape=None, else: act_type = keras_layer.activation.__name__ if act_type != "linear": - out = _convert_activation(out, act_type, etab) + out = _convert_activation(out, act_type, etab, data_layout) return out def _convert_flatten( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument _check_data_format(keras_layer) - if data_layout is None: - data_layout = etab.data_layout # NCHW -> NHWC so that dense can be correctly converted if data_layout == "NCHW": @@ -550,11 +538,9 @@ def _convert_flatten( def _convert_pooling( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument _check_data_format(keras_layer) - if data_layout is None: - data_layout = etab.data_layout pool_type = type(keras_layer).__name__ # global pool in keras = global pool + flatten in relay @@ -565,11 +551,13 @@ def _convert_pooling( if pool_type == "GlobalMaxPooling2D": return _convert_flatten( - _op.nn.global_max_pool2d(inexpr, **global_pool_params), keras_layer, etab + _op.nn.global_max_pool2d(inexpr, **global_pool_params), + keras_layer, etab, data_layout ) if pool_type == "GlobalAveragePooling2D": return _convert_flatten( - _op.nn.global_avg_pool2d(inexpr, **global_pool_params), keras_layer, etab + _op.nn.global_avg_pool2d(inexpr, **global_pool_params), + keras_layer, etab, data_layout ) pool_h, pool_w = keras_layer.pool_size stride_h, stride_w = keras_layer.strides @@ -601,13 +589,13 @@ def _convert_pooling( ) -def _convert_pooling3d(inexpr, keras_layer, etab, input_shape=None, data_layout=None): +def _convert_pooling3d( + inexpr, keras_layer, etab, data_layout, input_shape=None +): # pylint: disable=unused-argument _check_data_format(keras_layer) pool_type = type(keras_layer).__name__ if input_shape is None: input_shape = keras_layer.input_shape - if data_layout is None: - data_layout = etab.data_layout if pool_type not in ["MaxPooling3D", "AveragePooling3D"]: raise tvm.error.OpNotImplemented( @@ -649,12 +637,9 @@ def _convert_pooling3d(inexpr, keras_layer, etab, input_shape=None, data_layout= def _convert_global_pooling3d( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument _check_data_format(keras_layer) - if data_layout is None: - data_layout = etab.data_layout - pool_type = type(keras_layer).__name__ global_pool_params = {"layout": data_layout} @@ -671,11 +656,9 @@ def _convert_global_pooling3d( def _convert_upsample( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument _check_data_format(keras_layer) - if data_layout is None: - data_layout = etab.data_layout upsample_type = type(keras_layer).__name__ params = {} if upsample_type == "UpSampling1D": @@ -704,11 +687,9 @@ def _convert_upsample( def _convert_upsample3d( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument _check_data_format(keras_layer) - if data_layout is None: - data_layout = etab.data_layout params = {} d, h, w = keras_layer.size @@ -721,7 +702,7 @@ def _convert_upsample3d( def _convert_cropping( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument _check_data_format(keras_layer) crop_type = type(keras_layer).__name__ @@ -742,9 +723,7 @@ def _convert_cropping( ) -def _convert_batchnorm(inexpr, keras_layer, etab, input_shape=None, data_layout=None): - if data_layout is None: - data_layout = etab.data_layout +def _convert_batchnorm(inexpr, keras_layer, etab, data_layout, input_shape=None): if input_shape is None: input_shape = keras_layer.input_shape if data_layout == "NCHW" or len(input_shape) < 4: @@ -780,11 +759,9 @@ def _convert_batchnorm(inexpr, keras_layer, etab, input_shape=None, data_layout= def _convert_padding( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument _check_data_format(keras_layer) - if data_layout is None: - data_layout = etab.data_layout padding_type = type(keras_layer).__name__ padding = keras_layer.padding @@ -814,11 +791,9 @@ def _convert_padding( def _convert_padding3d( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument _check_data_format(keras_layer) - if data_layout is None: - data_layout = etab.data_layout padding = keras_layer.padding @@ -861,13 +836,11 @@ def _convert_padding3d( def _convert_concat( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument _check_data_format(keras_layer) if input_shape is None: input_shape = keras_layer.input_shape - if data_layout is None: - data_layout = etab.data_layout if data_layout == "NHWC" or len(input_shape[0]) < 4: axis = -1 @@ -876,12 +849,12 @@ def _convert_concat( return _op.concatenate(_as_list(inexpr), axis=axis) -def _convert_reshape(inexpr, keras_layer, etab, input_shape=None, data_layout=None): +def _convert_reshape( + inexpr, keras_layer, etab, data_layout, input_shape=None +): # pylint: disable=unused-argument _check_data_format(keras_layer) if input_shape is None: input_shape = keras_layer.input_shape - if data_layout is None: - data_layout = etab.data_layout inshape = input_shape # includes batch tshape = keras_layer.target_shape # no batch @@ -912,7 +885,7 @@ def _convert_reshape(inexpr, keras_layer, etab, input_shape=None, data_layout=No def _convert_lstm( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument _check_data_format(keras_layer) if input_shape is None: @@ -942,16 +915,18 @@ def _convert_lstm( gates = _op.split(gate, indices_or_sections=4, axis=1) in_gate = _convert_recurrent_activation(gates[0], keras_layer) in_transform = _convert_recurrent_activation(gates[1], keras_layer) - next_c = in_transform * next_c + in_gate * _convert_activation(gates[2], keras_layer, None) + next_c = in_transform * next_c + in_gate * _convert_activation( + gates[2], keras_layer, etab, data_layout + ) out_gate = _convert_recurrent_activation(gates[3], keras_layer) - next_h = out_gate * _convert_activation(next_c, keras_layer, None) + next_h = out_gate * _convert_activation(next_c, keras_layer, etab, data_layout) out_shape = tuple(dim if dim else 1 for dim in _as_list(keras_layer.output_shape)[0]) out = _op.reshape(next_h, newshape=out_shape) return [out, next_h, next_c] def _convert_simple_rnn( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument _check_data_format(keras_layer) if not isinstance(inexpr, list): @@ -970,14 +945,14 @@ def _convert_simple_rnn( prev_op = _op.nn.batch_flatten(prev_op) ixh2 = _op.nn.dense(prev_op, recurrent_weight, units=units) output = ixh + ixh2 - output = _convert_activation(output, keras_layer, None) + output = _convert_activation(output, keras_layer, etab, data_layout) out_shape = tuple(dim if dim else 1 for dim in _as_list(keras_layer.output_shape)[0]) output = _op.reshape(output, newshape=out_shape) return [output, output] def _convert_gru( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument _check_data_format(keras_layer) if not isinstance(inexpr, list): @@ -1013,7 +988,7 @@ def _convert_gru( rec_act_r = _convert_recurrent_activation(x_r + recurrent_r, keras_layer) units = keras_layer.units recurrent_h = _op.nn.dense(rec_act_r * h_tm1_op, rec_weights[1], units=units) - act_hh = _convert_activation(x_h + recurrent_h, keras_layer, None) + act_hh = _convert_activation(x_h + recurrent_h, keras_layer, etab, data_layout) # previous and candidate state mixed by update gate output = rec_act_z * h_tm1_op + (_expr.const(1.0, dtype="float32") - rec_act_z) * act_hh out_shape = tuple(dim if dim else 1 for dim in _as_list(keras_layer.output_shape)[0]) @@ -1022,7 +997,7 @@ def _convert_gru( def _convert_repeat_vector( - inexpr, keras_layer, etab, input_shape=None, data_layout=None + inexpr, keras_layer, etab, data_layout, input_shape=None ): # pylint: disable=unused-argument if input_shape is None: input_shape = keras_layer.input_shape @@ -1035,14 +1010,12 @@ def _convert_repeat_vector( return out -def _convert_time_distributed(inexpr, keras_layer, etab, input_shape=None, data_layout=None): +def _convert_time_distributed(inexpr, keras_layer, etab, data_layout, input_shape=None): # TimeDistributed: split input tensor along the second dimension (assumed to be time), # apply inner layer to each split individually, # and then combine the results if input_shape is None: input_shape = keras_layer.input_shape - if data_layout is None: - data_layout = etab.data_layout assert len(input_shape) >= 2, "Input to TimeDistributed must have at least two dimensions" @@ -1050,7 +1023,7 @@ def _convert_time_distributed(inexpr, keras_layer, etab, input_shape=None, data_ inner_input_shape = [d for (i, d) in enumerate(input_shape) if i != 1] # for NDHWC, inner data layout will drop the D - inner_data_layout = None + inner_data_layout = data_layout if data_layout == "NDHWC": inner_data_layout = "NHWC" @@ -1065,7 +1038,7 @@ def _convert_time_distributed(inexpr, keras_layer, etab, input_shape=None, data_ ) conversion_func = lambda expr: _convert_map[inner_layer_op_name]( - expr, inner_layer, etab, input_shape=inner_input_shape, data_layout=inner_data_layout + expr, inner_layer, etab, inner_data_layout, input_shape=inner_input_shape ) split_dim = input_shape[1] @@ -1095,7 +1068,7 @@ def _convert_time_distributed(inexpr, keras_layer, etab, input_shape=None, data_ return _expr.Let(split_var, split_input.astuple(), _op.stack(splits, axis=1)) -def _default_skip(inexpr, keras_layer, _): # pylint: disable=unused-argument +def _default_skip(inexpr, keras_layer, etab, data_layout): # pylint: disable=unused-argument """Layers that can be skipped because they are train time only.""" return inexpr @@ -1181,7 +1154,7 @@ def _check_unsupported_layers(model): ) -def keras_op_to_relay(inexpr, keras_layer, outname, etab): +def keras_op_to_relay(inexpr, keras_layer, outname, etab, data_layout): """Convert a Keras layer to a Relay expression and update the expression table. Parameters @@ -1197,13 +1170,16 @@ def keras_op_to_relay(inexpr, keras_layer, outname, etab): etab : relay.frontend.common.ExprTable The global expression table to be updated. + + data_layout : str + The input data layout """ op_name = type(keras_layer).__name__ if op_name not in _convert_map: raise tvm.error.OpNotImplemented( "Operator {} is not supported for frontend Keras.".format(op_name) ) - outs = _convert_map[op_name](inexpr, keras_layer, etab) + outs = _convert_map[op_name](inexpr, keras_layer, etab, data_layout) outs = _as_list(outs) for t_idx, out in enumerate(outs): name = outname + ":" + str(t_idx) @@ -1271,7 +1247,6 @@ def _convert_input_layer(keras_layer): etab = ExprTable() # Set global data format. assert layout in ["NCHW", "NHWC", "NDHWC"], "Layout must be one of 'NCHW', NHWC or NDHWC" - etab.data_layout = layout for keras_layer in model.layers: if isinstance(keras_layer, input_layer_class): _convert_input_layer(keras_layer) @@ -1326,7 +1301,8 @@ def _convert_input_layer(keras_layer): inexpr.append(expr) if len(inexpr) == 1: inexpr = inexpr[0] - keras_op_to_relay(inexpr, keras_layer, keras_layer.name + ":" + str(node_idx), etab) + keras_op_to_relay(inexpr, keras_layer, keras_layer.name + ":" + str(node_idx), + etab, layout) # model._output_coordinates contains out_node(oc[0]), node_index(oc[1]) and tensor_index(oc[2]) # Get all output nodes in etab using the name made from above values. # The out exprs were added to etab in keras_op_to_relay using this name.