-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmodel.py
84 lines (68 loc) · 3.84 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from model_kaggle import create_model_kaggle
from losses import *
def create_core_model(shape_in: tuple, n_output_dim: int, emb_dim: int = 256) -> (tf.keras.models.Model, str, str):
""" Create core model f()
Args:
shape_in (tuple): Input dimensions (excluding batch dimension)
n_output_dim (int): Number of output classes
emb_dim (int): Embedding dimension size
Returns:
model (tf.keras.models.Model): Core model
first_layer_after_input_label (str): Layer name of first layer after the input layer
emb_layer_label (str): Layer name of last layer before final classification layer
"""
# initialize Kaggle model
model = create_model_kaggle(shape_in=shape_in, num_output_dim=n_output_dim, emb_dim=emb_dim)
# input layer label
first_layer_after_input_label = model.layers[1].name
# label of pre-final dense layer (will be considered as embedding layer)
emb_layer_label = model.layers[-3].name
print("Create core model starting from layer {} to layer {} of the Kaggle model".format(first_layer_after_input_label,
emb_layer_label))
return model, first_layer_after_input_label, emb_layer_label
def create_model(shape_in: tuple,
n_output_dim: int,
do_masking: bool = False,
emb_dim: int = 256,
inference: bool = False) -> tf.keras.models.Model:
""" Create model for disentanglement learning with embedding masking and dual classification tasks
Args:
shape_in (tuple): Input feature shape (4D)
n_output_dim (int): Number of output classes
do_masking (bool): Switch whether to double the embedding layer size (used if embedding masking is
applied)
emb_dim (int): Embedding size
inference (bool): Switch, whether model should be created for inference mode (True), where batch size must not be
specified in the Input Layer or not (False)
Returns:
disent_model (tf.keras.models.Model): Core model
"""
# double embedding size if embedding masking is used
if do_masking:
emb_dim *= 2
# create core model
core_model, first_layer_after_input_label, emb_layer_label = create_core_model(shape_in[1:], n_output_dim,
emb_dim)
# generate final model with two input branches (spectrogram patch & embedding mask)
if inference:
spec_in = tf.keras.Input(shape=shape_in[1:])
emb_mask_in = tf.keras.Input(shape=(emb_dim,))
else:
# define batch-size during training
spec_in = tf.keras.Input(shape=shape_in[1:], batch_size=shape_in[0])
emb_mask_in = tf.keras.Input(shape=(emb_dim,), batch_size=shape_in[0])
# integrate part of the core model (just until embedding vector)
intermediate_model = tf.keras.models.Model(inputs=core_model.get_layer(first_layer_after_input_label).input,
outputs=core_model.get_layer(emb_layer_label).get_output_at(0),
name="core")
emb = intermediate_model(spec_in)
# normalize each embedding
emb = tf.keras.layers.LayerNormalization(name="emb")(emb)
# embedding masking
x_m = tf.keras.layers.multiply((emb, emb_mask_in))
# create two output layers for acoustic scene prediction (10 classes) and domain classification (3 classes)
out_asc = tf.keras.layers.Dense(10, activation='softmax', name='asc')(x_m)
out_dc = tf.keras.layers.Dense(3, activation='softmax', name='dc')(x_m)
# finally connect to dual-input-dual-output model
disent_model = tf.keras.models.Model(inputs=[spec_in, emb_mask_in], outputs=[out_asc, out_dc])
return disent_model