Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
timlautk authored Feb 12, 2018
1 parent b5e3c34 commit 76aeeea
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 36 deletions.
35 changes: 19 additions & 16 deletions bcd_dnn_mlp_mnist.m
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@

addpath Algorithms Tools

disp('MLP with Three Hidden Layers using the MNIST dataset')

rng('default');
seed = 20;
seed = 10;
rng(seed);
fprintf('Seed = %d \n', seed)

% read in MNIST dataset into Matlab format if not exist
if exist('mnist.mat', 'file')
Expand All @@ -32,7 +35,7 @@
[~,col] = find(X(1,:) < num_classes);
X = X(:,col);
[~,N] = size(X);
X = X(:,randperm(N)); % shuffle the dataset
X = X(:,randperm(N)); % shuffle the training dataset
x_train = X(2:end,:);
y_train = X(1,:)';
clear X
Expand All @@ -51,7 +54,7 @@
[~, col_test] = find(X_test(1,:) < num_classes);
X_test = X_test(:,col_test);
[~,N_test] = size(X_test);
X_test = X_test(:,randperm(N_test,N_test));
X_test = X_test(:,randperm(N_test,N_test)); % shuffle the test dataset
x_test = X_test(2:end,:);
y_test = X_test(1,:)';
clear X_test
Expand Down Expand Up @@ -88,7 +91,6 @@

indicator = 1; % 0 = sign; 1 = ReLU; 2 = tanh; 3 = sigmoid

% a1 = zeros(d1,N); a2 = zeros(d2,N); a3 = zeros(d3,N);
switch indicator
case 0 % sign (binary)
a1 = sign(W1*x_train+b1); a2 = sign(W2*a1+b2); a3 = sign(W3*a2+b3);
Expand All @@ -103,22 +105,21 @@
u1 = zeros(d1,N); u2 = zeros(d2,N); u3 = zeros(d3,N);

lambda = 0;
gamma = 0.1; gamma1 = gamma; gamma2 = gamma; gamma3 = gamma; gamma4 = 0.1;
gamma = 0.1; gamma1 = gamma; gamma2 = gamma; gamma3 = gamma; gamma4 = gamma;
% alpha1 = 10;
alpha1 = 1e-3;
alpha = 1e-2;
alpha1 = 1e-1;
alpha = 1e-1;
alpha2 = alpha; alpha3 = alpha; alpha4 = alpha;
alpha5 = alpha; alpha6 = alpha; alpha7 = alpha;
alpha8 = alpha; alpha9 = alpha; alpha10 = alpha;
% alpha8 = alpha; alpha9 = alpha; alpha10 = alpha;

beta = 0.9;
beta = 0.95;
beta1 = beta; beta2 = beta; beta3 = beta; beta4 = beta;
beta5 = beta; beta6 = beta; beta7 = beta;
beta8 = beta; beta9 = beta; beta10 = beta;
% beta8 = beta; beta9 = beta; beta10 = beta;

t = 0.1;

s = 10; % number of mini-batches
% niter = input('Number of iterations: ');
niter = 30;
loss1 = zeros(niter,1);
Expand Down Expand Up @@ -165,7 +166,7 @@
% update W3 and b3 (3rd layer)
[W3star,b3star] = updateWb_2(a3,a2,u3,W3,b3,alpha3,gamma3,lambda);
% adaptive momentum and update
[W3,b3,beta3] = AdaptiveWb1_3(a2,a3,W3,W3star,b3,b3star,beta3,t);
[W3,b3,beta3] = AdaptiveWb1_3(lambda,gamma3,a2,a3,W3,W3star,b3,b3star,beta3,t);


% update a2
Expand All @@ -180,7 +181,7 @@
% update W2 and b2 (2nd layer)
[W2star,b2star] = updateWb_2(a2,a1,u2,W2,b2,alpha5,gamma2,lambda);
% adaptive momentum and update
[W2,b2,beta5] = AdaptiveWb1_3(a1,a2,W2,W2star,b2,b2star,beta5,t);
[W2,b2,beta5] = AdaptiveWb1_3(lambda,gamma2,a1,a2,W2,W2star,b2,b2star,beta5,t);


% update a1
Expand All @@ -194,7 +195,7 @@
% update W1 and b1 (1st layer)
[W1star,b1star] = updateWb_2(a1,x_train,u1,W1,b1,alpha7,gamma1,lambda);
% adaptive momentum and update
[W1,b1,beta7] = AdaptiveWb1_3(x_train,a1,W1,W1star,b1,b1star,beta7,t);
[W1,b1,beta7] = AdaptiveWb1_3(lambda,gamma1,x_train,a1,W1,W1star,b1,b1star,beta7,t);

% Training accuracy
switch indicator
Expand Down Expand Up @@ -242,13 +243,14 @@
end
[~,pred_test] = max(V*a3_test+c,[],1);

time1(k) = toc;

loss1(k) = gamma4/2*norm(V*a3+c-y_one_hot,'fro')^2;
% loss1(k) = cross_entropy(y_one_hot,a1,V,c)+lambda*norm(V,'fro')^2;
loss2(k) = loss1(k)+gamma1/2*norm(W1*x_train+b1-a1+u1,'fro')^2+gamma2/2*norm(W2*a1+b2-a2+u2,'fro')^2+gamma3/2*norm(W3*a2+b3-a3+u3,'fro')^2+lambda*(norm(W1,'fro')^2+norm(W2,'fro')^2+norm(W3,'fro')^2+norm(V,'fro')^2);
% loss1(k) = cross_entropy(y_one_hot,a1,W2,b2)+gamma1/2*norm(W1*x_train+b1-a1,'fro')^2;
accuracy_train(k) = sum(pred'-1 == y_train)/N;
accuracy_test(k) = sum(pred_test'-1 == y_test)/N_test;
time1(k) = toc;
fprintf('epoch: %d, squared loss: %f, total loss: %f, training accuracy: %f, validation accuracy: %f, time: %f\n',k,loss1(k),loss2(k),accuracy_train(k),accuracy_test(k),time1(k));

end
Expand All @@ -271,7 +273,8 @@
figure;
graph2 = semilogy(1:niter,accuracy_train,1:niter,accuracy_test);
set(graph2,'LineWidth',1.5);
legend('Training accuracy','Validation accuracy','Location','southeast');
% ylim([0.85 1])
legend('Training accuracy','Test accuracy','Location','southeast');
ylabel('Accuracy')
xlabel('Epochs')
title('Three-layer MLP')
Expand Down
44 changes: 24 additions & 20 deletions bcd_dnn_resnet_mnist.m
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@

addpath Algorithms Tools

disp('Three Hidden-Layer with Residual Connection using the MNIST dataset')

rng('default');
seed = 20;
seed = 10;
rng(seed);
fprintf('Seed = %d \n', seed)

% read in MNIST dataset into Matlab format if not exist
if exist('mnist.mat', 'file')
Expand Down Expand Up @@ -117,21 +120,22 @@
lambda = 0;
gamma = 0.1; gamma1 = gamma; gamma2 = gamma; gamma3 = gamma; gamma4 = gamma; gammaL = gamma;
% alpha1 = 10;
alpha1 = 1e-5;
alpha = 1e-4;
alpha2 = alpha; alpha3 = alpha; alpha4 = alpha;
alpha5 = alpha; alpha6 = alpha; alpha7 = alpha;
alpha8 = alpha; alpha9 = alpha; alpha10 = alpha;
beta = 0.9;
alpha1 = 1;
alphao = 5;
alphae = 10;
alpha2 = alphae; alpha3 = alphao; alpha4 = alphae;
alpha5 = alphao; alpha6 = alphae; alpha7 = alphao;
% alpha8 = alpha; alpha9 = alpha; alpha10 = alpha;
beta = 0.95;
beta1 = beta; beta2 = beta; beta3 = beta; beta4 = beta;
beta5 = beta; beta6 = beta; beta7 = beta;
beta8 = beta; beta9 = beta; beta10 = beta;
% beta8 = beta; beta9 = beta; beta10 = beta;

t = 0.1;

% s = 10; % number of mini-batches
% niter = input('Number of iterations: ');
niter = 10;
niter = 20;
loss1 = zeros(niter,1);
loss2 = zeros(niter,1);
accuracy_train = zeros(niter,1);
Expand Down Expand Up @@ -179,10 +183,10 @@
% [W4,b4,beta3] = AdaptiveWb1_4(lambda,gamma4,a3,a4,W4,W4star,b4,b4star,u4,beta3,t);

% update a3
a3star = updatea_2(a2,a3,y_one_hot,W3,V,b3,c,u3,zeros(dL,1),alpha4,gamma3,gammaL,indicator);
a3star = updatea_2(a2,a3,y_one_hot,W3,V,b3,c,u3,zeros(dL,1),alpha2,gamma3,gammaL,indicator);
% a3star = updatea_2(a2,a3,a4,W3,W4,b3,b4,u3,zeros(d4,1),alpha4,gamma3,gamma4,indicator);
% adaptive momentum and update
[a3,beta4] = Adaptivea1_3(gamma3,gammaL,y_one_hot,a2,a3,a3star,W3,V,b3+u3,c,beta4,t);
[a3,beta2] = Adaptivea1_3(gamma3,gammaL,y_one_hot,a2,a3,a3star,W3,V,b3+u3,c,beta2,t);
% [a3,beta4] = Adaptivea1_3(gamma3,gamma4,a4,a2,a3,a3star,W3,W4,b3+u3,b4,beta4,t);

% update u3
Expand All @@ -198,37 +202,37 @@

% update a2
% a2star = updatea_2(a1,a2,a3,W2,W3,b2,b3,u2,u3,alpha4,gamma2,gamma3,indicator);
a2star = updatea_2(a1,a2,a3,W2,W3,b2+x_train,b3,u2,u3,alpha6,gamma2,gamma3,indicator);
a2star = updatea_2(a1,a2,a3,W2,W3,b2+x_train,b3,u2,u3,alpha4,gamma2,gamma3,indicator);
% adaptive momentum and update
% [a2,beta4] = Adaptivea1_3(gamma2,gamma3,a3,a1,a2,a2star,W2,W3,b2,b3,beta4,t);
[a2,beta6] = Adaptivea1_3(gamma2,gamma3,a3,a1,a2,a2star,W2,W3,b2+x_train+u2,b3+u3,beta6,t);
[a2,beta4] = Adaptivea1_3(gamma2,gamma3,a3,a1,a2,a2star,W2,W3,b2+x_train+u2,b3+u3,beta4,t);

% update u2
% u2 = a2-W2*a1-b2;
u2 = a2-W2*a1-b2-x_train;

% update W2 and b2 (2nd layer)
% [W2star,b2star] = updateWb_2(a2,a1,u2,W2,b2,alpha5,gamma2,lambda);
[W2star,b2star] = updateWb_ResNet(x_train,a2,a1,u2,W2,b2,alpha7,gamma2,lambda);
[W2star,b2star] = updateWb_ResNet(x_train,a2,a1,u2,W2,b2,alpha5,gamma2,lambda);
% adaptive momentum and update
[W2,b2,beta7] = AdaptiveWb_ResNet(lambda,gamma2,x_train,a1,a2-u2,W2,W2star,b2,b2star,beta7,t);
[W2,b2,beta5] = AdaptiveWb_ResNet(lambda,gamma2,x_train,a1,a2-u2,W2,W2star,b2,b2star,beta5,t);


% update a1
% a1star = updatea_2(x_train,a1,a2,W1,W2,b1,b2,u1,u2,alpha6,gamma1,gamma4,indicator);
a1star = updatea_2(x_train,a1,a2,W1,W2,b1,b2+x_train,u1,u2,alpha8,gamma1,gamma4,indicator);
a1star = updatea_2(x_train,a1,a2,W1,W2,b1,b2+x_train,u1,u2,alpha6,gamma1,gamma4,indicator);
% adaptive momentum and update
% [a1,beta6] = Adaptivea1_3(gamma1,gamma4,a2,x_train,a1,a1star,W1,W2,b1,b2,beta6,t);
[a1,beta8] = Adaptivea1_3(gamma1,gamma4,a2,x_train,a1,a1star,W1,W2,b1+u1,b2+x_train+u2,beta8,t);
[a1,beta6] = Adaptivea1_3(gamma1,gamma4,a2,x_train,a1,a1star,W1,W2,b1+u1,b2+x_train+u2,beta6,t);

% update u1
u1 = a1-W1*x_train-b1;

% update W1 and b1 (1st layer)
% [W1star,b1star] = updateWb(a1,x_train,W1,b1,alpha7,gamma1,lambda);
[W1star,b1star] = updateWb_2(a1,x_train,u1,W1,b1,alpha9,gamma1,lambda);
[W1star,b1star] = updateWb_2(a1,x_train,u1,W1,b1,alpha7,gamma1,lambda);
% adaptive momentum and update
[W1,b1,beta9] = AdaptiveWb1_4(lambda,gamma1,x_train,a1,W1,W1star,b1,b1star,u1,beta9,t);
[W1,b1,beta7] = AdaptiveWb1_4(lambda,gamma1,x_train,a1,W1,W1star,b1,b1star,u1,beta7,t);



Expand Down Expand Up @@ -325,7 +329,7 @@
figure;
graph2 = semilogy(1:niter,accuracy_train,1:niter,accuracy_test);
set(graph2,'LineWidth',1.5);
legend('Training accuracy','Validation accuracy');
legend('Training accuracy','Test accuracy','Location','southeast');
ylabel('Accuracy')
xlabel('Epochs')
title('Three-layer Fully-connected Network (2nd ResNet Hidden Layer)')
Expand Down

0 comments on commit 76aeeea

Please sign in to comment.