Skip to content

Commit

Permalink
refac(train): rm rendundant array allocations
Browse files Browse the repository at this point in the history
  • Loading branch information
rouson committed Jul 14, 2024
1 parent f3ea4a8 commit 578d2b3
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 122 deletions.
3 changes: 3 additions & 0 deletions src/inference_engine/trainable_engine_m.F90
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ module trainable_engine_m
real(rkind), allocatable :: b(:,:) ! biases
integer, allocatable :: n(:) ! nodes per layer
class(differentiable_activation_strategy_t), allocatable :: differentiable_activation_strategy_
real(rkind), allocatable, dimension(:,:) :: a
real(rkind), allocatable, dimension(:,:,:) :: dcdw, vdw, sdw, vdwc, sdwc
real(rkind), allocatable, dimension(:,:) :: z, delta, dcdb, vdb, sdb, vdbc, sdbc
contains
procedure :: assert_consistent
procedure :: train
Expand Down
243 changes: 121 additions & 122 deletions src/inference_engine/trainable_engine_s.F90
Original file line number Diff line number Diff line change
Expand Up @@ -112,148 +112,147 @@

module procedure train
integer l, batch, mini_batch_size, pair
real(rkind), allocatable :: &
z(:,:), a(:,:), delta(:,:), dcdw(:,:,:), dcdb(:,:), vdw(:,:,:), sdw(:,:,:), vdb(:,:), sdb(:,:), vdwc(:,:,:), sdwc(:,:,:), &
vdbc(:,:), sdbc(:,:)
type(tensor_t), allocatable :: inputs(:), expected_outputs(:)
real(rkind) eta, alpha

eta = learning_rate
alpha = learning_rate

call self%assert_consistent

if (.not. allocated(self%dcdw)) allocate(self%dcdw, mold=self%w) ! Gradient of cost function with respect to weights
if (.not. allocated(self%vdw)) allocate(self%vdw, mold=self%w)
if (.not. allocated(self%sdw)) allocate(self%sdw, mold=self%w)
if (.not. allocated(self%vdwc)) allocate(self%vdwc, mold=self%w)
if (.not. allocated(self%sdwc)) allocate(self%sdwc, mold=self%w)

if (.not. allocated(self%z)) allocate(self%z, mold=self%b) ! z-values: Sum z_j^l = w_jk^{l} a_k^{l-1} + b_j^l
if (.not. allocated(self%delta)) allocate(self%delta, mold=self%b)
if (.not. allocated(self%dcdb)) allocate(self%dcdb, mold=self%b) ! Gradient of cost function with respect with biases
if (.not. allocated(self%vdb)) allocate(self%vdb, mold=self%b)
if (.not. allocated(self%sdb)) allocate(self%sdb, mold=self%b)
if (.not. allocated(self%vdbc)) allocate(self%vdbc, mold=self%b)
if (.not. allocated(self%sdbc)) allocate(self%sdbc, mold=self%b)

associate(output_layer => ubound(self%n,1))

allocate(a(maxval(self%n), input_layer:output_layer)) ! Activations

allocate(dcdw, mold=self%w) ! Gradient of cost function with respect to weights
allocate(vdw, mold=self%w)
allocate(sdw, mold=self%w)
allocate(vdwc, mold=self%w)
allocate(sdwc, mold=self%w)

allocate(z, mold=self%b) ! z-values: Sum z_j^l = w_jk^{l} a_k^{l-1} + b_j^l
allocate(delta, mold=self%b)
allocate(dcdb, mold=self%b) ! Gradient of cost function with respect with biases
allocate(vdb, mold=self%b)
allocate(sdb, mold=self%b)
allocate(vdbc, mold=self%b)
allocate(sdbc, mold=self%b)

vdw = 0.d0
sdw = 1.d0
vdb = 0.d0
sdb = 1.d0

associate(w => self%w, b => self%b, n => self%n, num_mini_batches => size(mini_batches_arr))

if (present(cost)) allocate(cost(num_mini_batches))

iterate_across_batches: &
do batch = 1, num_mini_batches
if (.not. allocated(self%a)) allocate(self%a(maxval(self%n), input_layer:output_layer)) ! Activations

associate( &
a => self%a, dcdw => self%dcdw, vdw => self%vdw, sdw => self%sdw, vdwc => self%vdwc, sdwc => self%sdwc, &
z => self%z, delta => self%delta, dcdb => self%dcdb, vdb => self%vdb, sdb => self%sdb, vdbc => self%vdbc, sdbc=> self%sdbc &
)
vdw = 0.d0
sdw = 1.d0
vdb = 0.d0
sdb = 1.d0

associate(w => self%w, b => self%b, n => self%n, num_mini_batches => size(mini_batches_arr))

if (present(cost)) cost(batch) = 0.
dcdw = 0.; dcdb = 0.
if (present(cost)) allocate(cost(num_mini_batches))

iterate_across_batches: &
do batch = 1, num_mini_batches

if (present(cost)) cost(batch) = 0.
dcdw = 0.; dcdb = 0.

#ifndef _CRAYFTN
associate(input_output_pairs => mini_batches_arr(batch)%input_output_pairs())
associate(input_output_pairs => mini_batches_arr(batch)%input_output_pairs())
#else
block
type(input_output_pair_t), allocatable :: input_output_pairs(:)
input_output_pairs = mini_batches_arr(batch)%input_output_pairs()
#endif
inputs = input_output_pairs%inputs()
expected_outputs = input_output_pairs%expected_outputs()
mini_batch_size = size(input_output_pairs)
block
type(input_output_pair_t), allocatable :: input_output_pairs(:)
input_output_pairs = mini_batches_arr(batch)%input_output_pairs()
#endif
inputs = input_output_pairs%inputs()
expected_outputs = input_output_pairs%expected_outputs()
mini_batch_size = size(input_output_pairs)
#ifndef _CRAYFTN
end associate
end associate
#else
end block
#endif
end block
#endif

iterate_through_batch: &
do pair = 1, mini_batch_size
iterate_through_batch: &
do pair = 1, mini_batch_size

a(1:self%num_inputs(), input_layer) = inputs(pair)%values()
a(1:self%num_inputs(), input_layer) = inputs(pair)%values()

feed_forward: &
do l = 1,output_layer
z(1:n(l),l) = matmul(w(1:n(l),1:n(l-1),l), a(1:n(l-1),l-1)) + b(1:n(l),l)
a(1:n(l),l) = self%differentiable_activation_strategy_%activation(z(1:n(l),l))
end do feed_forward
feed_forward: &
do l = 1,output_layer
z(1:n(l),l) = matmul(w(1:n(l),1:n(l-1),l), a(1:n(l-1),l-1)) + b(1:n(l),l)
a(1:n(l),l) = self%differentiable_activation_strategy_%activation(z(1:n(l),l))
end do feed_forward

associate(y => expected_outputs(pair)%values())
if (present(cost)) &
cost(batch) = cost(batch) + sum((y(1:n(output_layer))-a(1:n(output_layer),output_layer))**2)/(2.e0*mini_batch_size)

delta(1:n(output_layer),output_layer) = &
(a(1:n(output_layer),output_layer) - y(1:n(output_layer))) &
* self%differentiable_activation_strategy_%activation_derivative(z(1:n(output_layer),output_layer))
end associate
associate(y => expected_outputs(pair)%values())
if (present(cost)) &
cost(batch)= cost(batch) + sum((y(1:n(output_layer))-a(1:n(output_layer),output_layer))**2)/(2.e0*mini_batch_size)

associate(n_hidden => self%num_layers()-2)
back_propagate_error: &
do l = n_hidden,1,-1
delta(1:n(l),l) = matmul(transpose(w(1:n(l+1),1:n(l),l+1)), delta(1:n(l+1),l+1)) &
* self%differentiable_activation_strategy_%activation_derivative(z(1:n(l),l))
end do back_propagate_error
end associate

block
integer j

sum_gradients: &
do l = 1,output_layer
dcdb(1:n(l),l) = dcdb(1:n(l),l) + delta(1:n(l),l)
do concurrent(j = 1:n(l))
dcdw(j,1:n(l-1),l) = dcdw(j,1:n(l-1),l) + a(1:n(l-1),l-1)*delta(j,l)
end do
end do sum_gradients
end block
delta(1:n(output_layer),output_layer) = &
(a(1:n(output_layer),output_layer) - y(1:n(output_layer))) &
* self%differentiable_activation_strategy_%activation_derivative(z(1:n(output_layer),output_layer))
end associate

associate(n_hidden => self%num_layers()-2)
back_propagate_error: &
do l = n_hidden,1,-1
delta(1:n(l),l) = matmul(transpose(w(1:n(l+1),1:n(l),l+1)), delta(1:n(l+1),l+1)) &
* self%differentiable_activation_strategy_%activation_derivative(z(1:n(l),l))
end do back_propagate_error
end associate

block
integer j

sum_gradients: &
do l = 1,output_layer
dcdb(1:n(l),l) = dcdb(1:n(l),l) + delta(1:n(l),l)
do concurrent(j = 1:n(l))
dcdw(j,1:n(l-1),l) = dcdw(j,1:n(l-1),l) + a(1:n(l-1),l-1)*delta(j,l)
end do
end do sum_gradients
end block

end do iterate_through_batch

if (adam) then
block
! Adam parameters
real, parameter :: beta(*) = [.9_rkind, .999_rkind]
real, parameter :: obeta(*) = [1._rkind - beta(1), 1._rkind - beta(2)]
real, parameter :: epsilon = real(1.D-08,rkind)

adam_adjust_weights_and_biases: &
do concurrent(l = 1:output_layer)
dcdw(1:n(l),1:n(l-1),l) = dcdw(1:n(l),1:n(l-1),l)/(mini_batch_size)
vdw(1:n(l),1:n(l-1),l) = beta(1)*vdw(1:n(l),1:n(l-1),l) + obeta(1)*dcdw(1:n(l),1:n(l-1),l)
sdw (1:n(l),1:n(l-1),l) = beta(2)*sdw(1:n(l),1:n(l-1),l) + obeta(2)*(dcdw(1:n(l),1:n(l-1),l)**2)
vdwc(1:n(l),1:n(l-1),l) = vdw(1:n(l),1:n(l-1),l)/(1._rkind - beta(1)**num_mini_batches)
sdwc(1:n(l),1:n(l-1),l) = sdw(1:n(l),1:n(l-1),l)/(1._rkind - beta(2)**num_mini_batches)
w(1:n(l),1:n(l-1),l) = w(1:n(l),1:n(l-1),l) &
- alpha*vdwc(1:n(l),1:n(l-1),l)/(sqrt(sdwc(1:n(l),1:n(l-1),l))+epsilon) ! Adjust weights

dcdb(1:n(l),l) = dcdb(1:n(l),l)/mini_batch_size
vdb(1:n(l),l) = beta(1)*vdb(1:n(l),l) + obeta(1)*dcdb(1:n(l),l)
sdb(1:n(l),l) = beta(2)*sdb(1:n(l),l) + obeta(2)*(dcdb(1:n(l),l)**2)
vdbc(1:n(l),l) = vdb(1:n(l),l)/(1._rkind - beta(1)**num_mini_batches)
sdbc(1:n(l),l) = sdb(1:n(l),l)/(1._rkind - beta(2)**num_mini_batches)
b(1:n(l),l) = b(1:n(l),l) - alpha*vdbc(1:n(l),l)/(sqrt(sdbc(1:n(l),l))+epsilon) ! Adjust weights
end do adam_adjust_weights_and_biases
end block
else
adjust_weights_and_biases: &
do concurrent(l = 1:output_layer)
dcdb(1:n(l),l) = dcdb(1:n(l),l)/mini_batch_size
b(1:n(l),l) = b(1:n(l),l) - eta*dcdb(1:n(l),l) ! Adjust biases
dcdw(1:n(l),1:n(l-1),l) = dcdw(1:n(l),1:n(l-1),l)/mini_batch_size
w(1:n(l),1:n(l-1),l) = w(1:n(l),1:n(l-1),l) - eta*dcdw(1:n(l),1:n(l-1),l) ! Adjust weights
end do adjust_weights_and_biases
end if

end do iterate_across_batches

end do iterate_through_batch

if (adam) then
block
! Adam parameters
real, parameter :: beta(*) = [.9_rkind, .999_rkind]
real, parameter :: obeta(*) = [1._rkind - beta(1), 1._rkind - beta(2)]
real, parameter :: epsilon = real(1.D-08,rkind)

associate(alpha => learning_rate)
adam_adjust_weights_and_biases: &
do concurrent(l = 1:output_layer)
dcdw(1:n(l),1:n(l-1),l) = dcdw(1:n(l),1:n(l-1),l)/(mini_batch_size)
vdw(1:n(l),1:n(l-1),l) = beta(1)*vdw(1:n(l),1:n(l-1),l) + obeta(1)*dcdw(1:n(l),1:n(l-1),l)
sdw (1:n(l),1:n(l-1),l) = beta(2)*sdw(1:n(l),1:n(l-1),l) + obeta(2)*(dcdw(1:n(l),1:n(l-1),l)**2)
vdwc(1:n(l),1:n(l-1),l) = vdw(1:n(l),1:n(l-1),l)/(1._rkind - beta(1)**num_mini_batches)
sdwc(1:n(l),1:n(l-1),l) = sdw(1:n(l),1:n(l-1),l)/(1._rkind - beta(2)**num_mini_batches)
w(1:n(l),1:n(l-1),l) = w(1:n(l),1:n(l-1),l) &
- alpha*vdwc(1:n(l),1:n(l-1),l)/(sqrt(sdwc(1:n(l),1:n(l-1),l))+epsilon) ! Adjust weights

dcdb(1:n(l),l) = dcdb(1:n(l),l)/mini_batch_size
vdb(1:n(l),l) = beta(1)*vdb(1:n(l),l) + obeta(1)*dcdb(1:n(l),l)
sdb(1:n(l),l) = beta(2)*sdb(1:n(l),l) + obeta(2)*(dcdb(1:n(l),l)**2)
vdbc(1:n(l),l) = vdb(1:n(l),l)/(1._rkind - beta(1)**num_mini_batches)
sdbc(1:n(l),l) = sdb(1:n(l),l)/(1._rkind - beta(2)**num_mini_batches)
b(1:n(l),l) = b(1:n(l),l) - alpha*vdbc(1:n(l),l)/(sqrt(sdbc(1:n(l),l))+epsilon) ! Adjust weights
end do adam_adjust_weights_and_biases
end associate
end block
else
associate(eta => learning_rate)
adjust_weights_and_biases: &
do concurrent(l = 1:output_layer)
dcdb(1:n(l),l) = dcdb(1:n(l),l)/mini_batch_size
b(1:n(l),l) = b(1:n(l),l) - eta*dcdb(1:n(l),l) ! Adjust biases
dcdw(1:n(l),1:n(l-1),l) = dcdw(1:n(l),1:n(l-1),l)/mini_batch_size
w(1:n(l),1:n(l-1),l) = w(1:n(l),1:n(l-1),l) - eta*dcdw(1:n(l),1:n(l-1),l) ! Adjust weights
end do adjust_weights_and_biases
end associate
end if
end do iterate_across_batches
end associate
end associate
end associate

end procedure

#ifdef __INTEL_COMPILER
Expand Down

0 comments on commit 578d2b3

Please sign in to comment.