You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
i stumbled across same problem Possible Conv1D and Max1D Issue #6 at first, then i replaced kann_layer_input to kad_feed(3, 1, 1, 28) to make it work, but numbers 1, 1 still looks like magic to me... Are they correct ?
does backprop work correctly for conv1d on unrolled rnn ?
Whole code:
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "kann_extra/kann_data.h"
#include "kann.h"
typedef struct {
int n_in, n_out, ulen, n;
float **x, **y;
} train_data;
static void train(kann_t *ann, train_data *d, float lr, int mini_size, int max_epoch, const char *fn, int n_threads)
{
float **x, **y, *r, best_cost = 1e30f;
int epoch, j, n_var, *shuf;
kann_t *ua;
n_var = kann_size_var(ann);
r = (float*)calloc(n_var, sizeof(float));
x = (float**)malloc(d->ulen * sizeof(float*));
y = (float**)malloc(1 * sizeof(float*));
for (j = 0; j < d->ulen; ++j) {
x[j] = (float*)calloc(mini_size * d->n_in, sizeof(float));
}
y[0] = (float*)calloc(mini_size * d->n_out, sizeof(float));
shuf = (int*)calloc(d->n, sizeof(int));
ua = kann_unroll(ann, d->ulen);
kann_set_batch_size(ua, mini_size);
kann_mt(ua, n_threads, mini_size);
kann_feed_bind(ua, KANN_F_IN, 0, x);
kann_feed_bind(ua, KANN_F_TRUTH, 0, y);
kann_switch(ua, 1);
for (epoch = 0; epoch < max_epoch; ++epoch) {
kann_shuffle(d->n, shuf);
double cost = 0.0;
int tot = 0, tot_base = 0, n_cerr = 0;
for (j = 0; j < d->n - mini_size; j += mini_size) {
int b, k;
for (b = 0; b < mini_size; ++b) {
int s = shuf[j + b];
for (k = 0; k < d->ulen; ++k) {
memcpy(&x[k][b * d->n_in], &d->x[s][k * d->n_in], d->n_in * sizeof(float));
}
memcpy(&y[0][b * d->n_out], d->y[s], d->n_out * sizeof(float));
}
cost += kann_cost(ua, 0, 1) * d->ulen * mini_size;
n_cerr += kann_class_error(ua, &k);
tot_base += k;
//kad_check_grad(ua->n, ua->v, ua->n-1);
kann_RMSprop(n_var, lr, 0, 0.9f, ua->g, ua->x, r);
tot += d->ulen * mini_size;
}
if (cost < best_cost) {
best_cost = cost;
if (fn) kann_save(fn, ann);
}
fprintf(stderr, "epoch: %d; cost: %g (class error: %.2f%%)\n", epoch+1, cost / tot, 100.0f * n_cerr / tot_base);
}
kann_delete_unrolled(ua);
for (j = 0; j < d->ulen; ++j) {
free(x[j]);
}
free(y[0]); free(y); free(x); free(r); free(shuf);
}
static train_data* create_train_data(kann_t *ann, kann_data_t *x, kann_data_t *y)
{
train_data *d;
d = (train_data*)malloc(sizeof(*d));
assert(d);
assert(x->n_row == y->n_row);
d->x = x->x;
d->y = y->x;
d->ulen = 28; // 28x28
d->n = x->n_row;
d->n_in = kann_dim_in(ann);
d->n_out = kann_dim_out(ann);
return d;
}
int main(int argc, char *argv[])
{
kann_t *ann;
kann_data_t *x, *y;
char *fn_in = 0, *fn_out = 0;
int c, i, mini_size = 64, max_epoch = 50, seed = 84, n_h_layers = 1, n_h_neurons = 64, norm = 1, n_h_flt = 32, n_threads = 1;
float lr = 0.001f, dropout = 0.2f;
while ((c = getopt(argc, argv, "i:o:m:l:n:d:s:t:N")) >= 0) {
if (c == 'i') fn_in = optarg;
else if (c == 'o') fn_out = optarg;
else if (c == 'm') max_epoch = atoi(optarg);
else if (c == 'l') n_h_layers = atoi(optarg);
else if (c == 'n') n_h_neurons = atoi(optarg);
else if (c == 'd') dropout = atof(optarg);
else if (c == 's') seed = atoi(optarg);
else if (c == 't') n_threads = atoi(optarg);
else if (c == 'N') norm = 0;
}
if (argc - optind == 0 || (argc - optind == 1 && fn_in == 0)) {
FILE *fp = stdout;
fprintf(fp, "Usage: mnist-cnn [-i model] [-o model] [-t nThreads] <x.knd> [y.knd]\n");
return 1;
}
kad_trap_fe();
kann_srand(seed);
if (fn_in) {
ann = kann_load(fn_in);
} else {
kad_node_t *t;
int rnn_flag = KANN_RNN_VAR_H0;
if (norm) rnn_flag |= KANN_RNN_NORM;
t = kad_feed(3, 1, 1, 28), t->ext_flag |= KANN_F_IN;
t = kad_relu(kann_layer_conv1d(t, 32, 3, 1, 0)); // 3 kernel; 1 stride; 0 padding
t = kann_layer_dropout(t, dropout);
t = kad_max1d(t, 2, 2, 0); // 2 kernel; 2 stride; 0 padding
for (i = 0; i < n_h_layers; ++i) {
t = kann_layer_gru(t, n_h_neurons, rnn_flag);
t = kann_layer_dropout(t, dropout);
}
t = kad_select(1, &t, -1);
ann = kann_new(kann_layer_cost(t, 10, KANN_C_CEB), 0);
kad_print_graph(stdout, ann->n, ann->v);
}
x = kann_data_read(argv[optind]);
assert(x->n_col == 28 * 28);
y = argc - optind >= 2? kann_data_read(argv[optind+1]) : 0;
if (y) { // training
assert(y->n_col == 10);
if (n_threads > 1) kann_mt(ann, n_threads, mini_size);
train_data *d;
d = create_train_data(ann, x, y);
train(ann, d, lr, mini_size, max_epoch, fn_out, n_threads);
free(d);
kann_data_free(y);
} else { // applying
int i, j, k, n_out;
kann_switch(ann, 0);
n_out = kann_dim_out(ann);
assert(n_out == 10);
for (i = 0; i < x->n_row; ++i) {
const float *y;
kann_rnn_start(ann);
for(k = 0; k < 28; ++k) {
float x1[28];
memcpy(x1, &x->x[i][k * 28], sizeof(x1));
y = kann_apply1(ann, x1);
}
if (x->rname) printf("%s\t", x->rname[i]);
for (j = 0; j < n_out; ++j) {
if (j) putchar('\t');
printf("%.3g", y[j] + 1.0f - 1.0f);
}
putchar('\n');
kann_rnn_end(ann);
}
}
kann_data_free(x);
kann_delete(ann);
return 0;
}
The text was updated successfully, but these errors were encountered:
I want to combine convolutional layer with recurrent one. This code is based on #19:
It works:
Questions:
i stumbled across same problem Possible Conv1D and Max1D Issue #6 at first, then i replaced
kann_layer_input
tokad_feed(3, 1, 1, 28)
to make it work, but numbers 1, 1 still looks like magic to me... Are they correct ?does backprop work correctly for conv1d on unrolled rnn ?
Whole code:
The text was updated successfully, but these errors were encountered: