draw.py

from model import Model

import tensorflow as tf


def linear(x, output_dim):
    w = tf.get_variable("w", [x.get_shape()[1], output_dim])
    b = tf.get_variable(
        "b", [output_dim],
        initializer=tf.constant_initializer(0.0)
    )
    return tf.matmul(x, w) + b


class DRAW(Model):
    def __init__(self, config):
        super().__init__(config)

    def _create_network(self):
        self.T = self.config['T']
        self.n_z = self.config['n_z']
        self.enc_size = self.config['enc_size']
        self.dec_size = self.config['dec_size']
        self.read_n = self.config['read_n']
        self.write_n = self.config['write_n']
        self.read_attn = self.config['read_attn']
        self.write_attn = self.config['write_attn']
        self.read_size = (2 * self.read_n *
                          self.read_n if self.read_attn else 2 * self.img_size)
        self.write_size = (
            self.write_n * self.write_n if self.write_attn else self.img_size)
        self.z_size = self.T * self.n_z
        self.DO_SHARE = None

        self.e = tf.random_normal(
            (self.batch_size, self.n_z),
            mean=0,
            stddev=1
        )

        self.lstm_enc = tf.contrib.rnn.LSTMCell(
            self.enc_size, state_is_tuple=True)  # encoder Op
        self.lstm_dec = tf.contrib.rnn.LSTMCell(
            self.dec_size, state_is_tuple=True)  # decoder Op

        self._f_write = (
            self._f_write_attn if self.write_attn else self._f_write_no_attn)

        self._f_read = (
            self._f_read_attn if self.read_attn else self._f_read_no_attn)

        self._build_graph()

    def _build_graph(self):
        cs = [0] * self.T  # sequence of canvases

        # gaussian params generated by SampleQ. We will need these for
        # computing loss.
        self.mus, self.logvars, self.sigmas = [
            0] * self.T, [0] * self.T, [0] * self.T

        # initial states
        h_dec_prev = tf.zeros((self.batch_size, self.dec_size))
        enc_state = self.lstm_enc.zero_state(self.batch_size, tf.float32)
        dec_state = self.lstm_dec.zero_state(self.batch_size, tf.float32)

        # DRAW MODEL

        # construct the unrolled computational graph
        for t in range(self.T):

            if self.is_gaussian:
                c_prev = tf.zeros((self.batch_size, 2 * self.img_size)
                                  ) if t == 0 else cs[t - 1]
                m, s = tf.split(c_prev, 2, 1)
                x_hat = self.x_in - m  # error image
            else:
                c_prev = tf.zeros((self.batch_size, self.img_size)
                                  ) if t == 0 else cs[t - 1]
                m = c_prev
                x_hat = self.x_in - tf.sigmoid(m)

            r = self._f_read(self.x_in, x_hat, h_dec_prev)
            h_enc, enc_state = self._encode(
                enc_state, tf.concat([r, h_dec_prev], 1))
            z, self.mus[t], self.logvars[t], self.sigmas[t] = \
                self._sampleQ(h_enc)
            h_dec, dec_state = self._decode(dec_state, z)
            cs[t] = c_prev + self._f_write(h_dec)  # store results
            h_dec_prev = h_dec
            self.DO_SHARE = True  # from now on, share variables

        # reconstruction term appears to have been collapsed down
        # to a single scalar value (rather than one per item in
        # minibatch)
        self.z_mean = tf.reshape(tf.transpose(
            tf.convert_to_tensor(self.mus),
            [1, 0, 2]
        ), [self.batch_size, -1])

        self.z_log_var = tf.reshape(tf.transpose(
            tf.convert_to_tensor(self.logvars),
            [1, 0, 2]
        ), [self.batch_size, -1])

        if self.is_gaussian:
            self.m, self.s = tf.split(cs[-1], 2, 1)
            self.x_reconstr_mean = self.m
            self.x_reconstr_log_var = self.s
        else:
            self.x_reconstr_mean = tf.sigmoid(cs[-1])

    def _compute_latent_loss(self):
        kl_terms = [0] * self.T
        for t in range(self.T):
            mu2 = tf.square(self.mus[t])
            sigma2 = tf.square(self.sigmas[t])
            logvar = self.logvars[t]
            # each kl term is (1xminibatch)
            kl_terms[t] = 0.5 * \
                tf.reduce_sum(mu2 + sigma2 - logvar, 1) - .5
        # this is 1xminibatch, corresponding to summing kl_terms
        # from 1:T
        KL = tf.add_n(kl_terms)

        reconstr_Lz = tf.reduce_mean(KL)  # average over minibatches

        return reconstr_Lz

    def _attn_window(self, scope, h_dec, N):
        with tf.variable_scope(scope, reuse=self.DO_SHARE):
            params = linear(h_dec, 5)

        # gx_,gy_,log_sigma2,log_delta,log_gamma=tf.split(1,5,params)
        gx_, gy_, log_sigma2, log_delta, log_gamma = tf.split(params, 5, 1)
        gx = ((self.A + 1) / 2) * (gx_ + 1)
        gy = ((self.B + 1) / 2) * (gy_ + 1)
        sigma2 = tf.exp(log_sigma2)
        delta = ((max(self.A, self.B) - 1) / (N - 1)
                 * tf.exp(log_delta))  # batch x N
        return self.filterbank(gx, gy, sigma2, delta, N) + (tf.exp(log_gamma),)

    def filterbank(self, gx, gy, sigma2, delta, N, eps=1e-8):
        grid_i = tf.reshape(tf.cast(tf.range(N), tf.float32), [1, -1])
        mu_x = gx + (grid_i - N / 2 - 0.5) * delta  # eq 19
        mu_y = gy + (grid_i - N / 2 - 0.5) * delta  # eq 20
        a = tf.reshape(tf.cast(tf.range(self.A), tf.float32), [1, 1, -1])
        b = tf.reshape(tf.cast(tf.range(self.B), tf.float32), [1, 1, -1])
        mu_x = tf.reshape(mu_x, [-1, N, 1])
        mu_y = tf.reshape(mu_y, [-1, N, 1])
        sigma2 = tf.reshape(sigma2, [-1, 1, 1])
        Fx = tf.exp(-tf.square((a - mu_x) / (2 * sigma2)))  # 2*sigma2?
        Fy = tf.exp(-tf.square((b - mu_y) / (2 * sigma2)))  # batch x N x B
        # normalize, sum over A and B dims
        Fx = Fx / tf.maximum(tf.reduce_sum(Fx, 2, keep_dims=True), eps)
        Fy = Fy / tf.maximum(tf.reduce_sum(Fy, 2, keep_dims=True), eps)
        return Fx, Fy

    def _sampleQ(self, h_enc):
        """
        Samples Zt ~ normrnd(mu,sigma) via reparameterization trick for normal
        dist mu is (batch, n_z)
        """
        with tf.variable_scope("mu", reuse=self.DO_SHARE):
            mu = linear(h_enc, self.n_z)
        with tf.variable_scope("sigma", reuse=self.DO_SHARE):
            logvar = linear(h_enc, self.n_z)
            sigma = tf.exp(0.5 * logvar)
        return (mu + sigma * self.e, mu, logvar, sigma)

    def _encode(self, state, input):
        """
        run LSTM
        state = previous encoder state
        input = cat(read,h_dec_prev)
        returns: (output, new_state)
        """
        with tf.variable_scope("encoder", reuse=self.DO_SHARE):
            return self.lstm_enc(input, state)

    def _decode(self, state, input):
        with tf.variable_scope("decoder", reuse=self.DO_SHARE):
            return self.lstm_dec(input, state)

    def _f_read_no_attn(self, x, x_hat, h_dec_prev):
        return tf.concat([x, x_hat], 1)

    def _f_read_attn(self, x, x_hat, h_dec_prev):
        Fx, Fy, gamma = self._attn_window("read", h_dec_prev, self.read_n)

        def filter_img(img, Fx, Fy, gamma, N):
            Fxt = tf.transpose(Fx, perm=[0, 2, 1])
            img = tf.reshape(img, [-1, self.A, self.B, self.n_chan])
            glimpse = []
            gamma = tf.reshape(gamma, [-1, 1])
            for i in range(self.n_chan):
                g = tf.matmul(Fy, tf.matmul(img[:, :, :, i], Fxt))
                glimpse.append(g)

            glimpse = tf.stack(glimpse, axis=3)
            glimpse = gamma * \
                tf.reshape(glimpse, [-1, N * N * self.n_chan])

            return glimpse

        # batch x (read_n*read_n)
        x = filter_img(x, Fx, Fy, gamma, self.read_n)
        x_hat = filter_img(x_hat, Fx, Fy, gamma, self.read_n)

        return tf.concat([x, x_hat], 1)  # concat along feature axis

    def _f_write_no_attn(self, h_dec):
        with tf.variable_scope("write", reuse=self.DO_SHARE):
            if self.is_gaussian:
                return linear(h_dec, 2 * self.img_size)
            else:
                return linear(h_dec, self.img_size)

    def _f_write_attn_aux(self, Fx, Fy, gamma, param):
        N = self.write_n

        param = tf.reshape(param, [self.batch_size, N, N, self.n_chan])

        Fyt = tf.transpose(Fy, perm=[0, 2, 1])

        gamma = tf.reshape(1.0 / gamma, [-1, 1])
        paramrs = []
        for i in range(self.n_chan):
            paramr = tf.matmul(Fyt, tf.matmul(param[:, :, :, i], Fx))
            paramr = tf.reshape(paramr, [self.batch_size, self.A * self.B])

            paramrs.append(paramr)

        paramrs = tf.reshape(tf.stack(paramrs, axis=2), [self.batch_size, -1])
        paramrs = paramrs * gamma

        return paramrs

    def _f_write_attn(self, h_dec):
        with tf.variable_scope("writeW", reuse=self.DO_SHARE):
            if self.is_gaussian:
                w = linear(h_dec, 2 * self.write_size * self.n_chan)
            else:
                w = linear(h_dec, self.write_size * self.n_chan)

        Fx, Fy, gamma = self._attn_window("write", h_dec, self.write_n)
        gamma = tf.reshape(1.0 / gamma, [-1, 1])

        if self.is_gaussian:
            wm, ws = tf.split(w, 2, 1)
        else:
            wm = w

        wmrs = self._f_write_attn_aux(Fx, Fy, gamma, wm)

        if self.is_gaussian:
            wsrs = self._f_write_attn_aux(Fx, Fy, gamma, ws)
            return tf.concat([wmrs, wsrs], 1)
        else:
            return wmrs