refac(train): rm rendundant array allocations

BerkeleyLab · Jul 14, 2024 · 578d2b3 · 578d2b3
1 parent f3ea4a8
commit 578d2b3
Show file tree

Hide file tree

Showing 2 changed files with 124 additions and 122 deletions.
diff --git a/src/inference_engine/trainable_engine_m.F90 b/src/inference_engine/trainable_engine_m.F90
@@ -26,6 +26,9 @@ module trainable_engine_m
     real(rkind), allocatable :: b(:,:) ! biases
     integer, allocatable :: n(:) ! nodes per layer
     class(differentiable_activation_strategy_t), allocatable :: differentiable_activation_strategy_ 
+    real(rkind), allocatable, dimension(:,:) :: a
+    real(rkind), allocatable, dimension(:,:,:) :: dcdw, vdw, sdw, vdwc, sdwc
+    real(rkind), allocatable, dimension(:,:) :: z, delta, dcdb, vdb, sdb, vdbc, sdbc
   contains
     procedure :: assert_consistent
     procedure :: train

diff --git a/src/inference_engine/trainable_engine_s.F90 b/src/inference_engine/trainable_engine_s.F90
@@ -112,148 +112,147 @@
 
   module procedure train
     integer l, batch, mini_batch_size, pair
-    real(rkind), allocatable :: &
-      z(:,:), a(:,:), delta(:,:), dcdw(:,:,:), dcdb(:,:), vdw(:,:,:), sdw(:,:,:), vdb(:,:), sdb(:,:), vdwc(:,:,:), sdwc(:,:,:), &
-      vdbc(:,:), sdbc(:,:)
     type(tensor_t), allocatable :: inputs(:), expected_outputs(:)
-    real(rkind) eta, alpha
-
-    eta = learning_rate
-    alpha = learning_rate
 
     call self%assert_consistent
 
+    if (.not. allocated(self%dcdw)) allocate(self%dcdw, mold=self%w) ! Gradient of cost function with respect to weights
+    if (.not. allocated(self%vdw)) allocate(self%vdw, mold=self%w) 
+    if (.not. allocated(self%sdw)) allocate(self%sdw, mold=self%w) 
+    if (.not. allocated(self%vdwc)) allocate(self%vdwc,  mold=self%w) 
+    if (.not. allocated(self%sdwc)) allocate(self%sdwc,  mold=self%w) 
+
+    if (.not. allocated(self%z)) allocate(self%z,  mold=self%b) ! z-values: Sum z_j^l = w_jk^{l} a_k^{l-1} + b_j^l
+    if (.not. allocated(self%delta)) allocate(self%delta, mold=self%b)
+    if (.not. allocated(self%dcdb)) allocate(self%dcdb,  mold=self%b) ! Gradient of cost function with respect with biases
+    if (.not. allocated(self%vdb)) allocate(self%vdb,   mold=self%b) 
+    if (.not. allocated(self%sdb)) allocate(self%sdb,   mold=self%b) 
+    if (.not. allocated(self%vdbc)) allocate(self%vdbc,  mold=self%b) 
+    if (.not. allocated(self%sdbc)) allocate(self%sdbc,  mold=self%b) 
+
     associate(output_layer => ubound(self%n,1))
 
-      allocate(a(maxval(self%n), input_layer:output_layer)) ! Activations
-
-      allocate(dcdw,  mold=self%w) ! Gradient of cost function with respect to weights
-      allocate(vdw,   mold=self%w) 
-      allocate(sdw,   mold=self%w) 
-      allocate(vdwc,  mold=self%w) 
-      allocate(sdwc,  mold=self%w) 
-
-      allocate(z,     mold=self%b) ! z-values: Sum z_j^l = w_jk^{l} a_k^{l-1} + b_j^l
-      allocate(delta, mold=self%b)
-      allocate(dcdb,  mold=self%b) ! Gradient of cost function with respect with biases
-      allocate(vdb,   mold=self%b) 
-      allocate(sdb,   mold=self%b) 
-      allocate(vdbc,  mold=self%b) 
-      allocate(sdbc,  mold=self%b) 
-
-      vdw = 0.d0
-      sdw = 1.d0
-      vdb = 0.d0
-      sdb = 1.d0
-
-      associate(w => self%w, b => self%b, n => self%n, num_mini_batches => size(mini_batches_arr))
-
-        if (present(cost)) allocate(cost(num_mini_batches))
-
-        iterate_across_batches: &
-        do batch = 1, num_mini_batches
+      if (.not. allocated(self%a)) allocate(self%a(maxval(self%n), input_layer:output_layer)) ! Activations
+
+      associate( &
+        a => self%a, dcdw => self%dcdw, vdw => self%vdw, sdw => self%sdw, vdwc => self%vdwc, sdwc => self%sdwc, &
+        z => self%z, delta => self%delta, dcdb => self%dcdb, vdb => self%vdb, sdb => self%sdb, vdbc => self%vdbc, sdbc=> self%sdbc &
+      )
+        vdw = 0.d0
+        sdw = 1.d0
+        vdb = 0.d0
+        sdb = 1.d0
+
+        associate(w => self%w, b => self%b, n => self%n, num_mini_batches => size(mini_batches_arr))
 
-          if (present(cost)) cost(batch) = 0.
-          dcdw = 0.; dcdb = 0.
+          if (present(cost)) allocate(cost(num_mini_batches))
+
+          iterate_across_batches: &
+          do batch = 1, num_mini_batches
+
+            if (present(cost)) cost(batch) = 0.
+            dcdw = 0.; dcdb = 0.
 
 #ifndef _CRAYFTN
-          associate(input_output_pairs => mini_batches_arr(batch)%input_output_pairs())
+            associate(input_output_pairs => mini_batches_arr(batch)%input_output_pairs())
 #else
-          block
-            type(input_output_pair_t), allocatable :: input_output_pairs(:)
-            input_output_pairs = mini_batches_arr(batch)%input_output_pairs()
-#endif
-            inputs = input_output_pairs%inputs()
-            expected_outputs = input_output_pairs%expected_outputs()
-            mini_batch_size = size(input_output_pairs)
+            block
+              type(input_output_pair_t), allocatable :: input_output_pairs(:)
+              input_output_pairs = mini_batches_arr(batch)%input_output_pairs()
+#endif  
+              inputs = input_output_pairs%inputs()
+              expected_outputs = input_output_pairs%expected_outputs()
+              mini_batch_size = size(input_output_pairs)
 #ifndef _CRAYFTN
-          end associate
+            end associate
 #else
-          end block
-#endif
+            end block
+#endif  
 
-          iterate_through_batch: &
-          do pair = 1, mini_batch_size
+            iterate_through_batch: &
+            do pair = 1, mini_batch_size
 
-            a(1:self%num_inputs(), input_layer) = inputs(pair)%values()
+              a(1:self%num_inputs(), input_layer) = inputs(pair)%values()
 
-            feed_forward: &
-            do l = 1,output_layer
-              z(1:n(l),l) = matmul(w(1:n(l),1:n(l-1),l), a(1:n(l-1),l-1)) + b(1:n(l),l)
-              a(1:n(l),l) = self%differentiable_activation_strategy_%activation(z(1:n(l),l))
-            end do feed_forward
+              feed_forward: &
+              do l = 1,output_layer
+                z(1:n(l),l) = matmul(w(1:n(l),1:n(l-1),l), a(1:n(l-1),l-1)) + b(1:n(l),l)
+                a(1:n(l),l) = self%differentiable_activation_strategy_%activation(z(1:n(l),l))
+              end do feed_forward
 
-            associate(y => expected_outputs(pair)%values())
-              if (present(cost)) &
-                cost(batch) = cost(batch) + sum((y(1:n(output_layer))-a(1:n(output_layer),output_layer))**2)/(2.e0*mini_batch_size)
-
-              delta(1:n(output_layer),output_layer) = &
-                (a(1:n(output_layer),output_layer) - y(1:n(output_layer))) &
-                * self%differentiable_activation_strategy_%activation_derivative(z(1:n(output_layer),output_layer))
-            end associate
+              associate(y => expected_outputs(pair)%values())
+                if (present(cost)) &
+                  cost(batch)= cost(batch) + sum((y(1:n(output_layer))-a(1:n(output_layer),output_layer))**2)/(2.e0*mini_batch_size)
 
-            associate(n_hidden => self%num_layers()-2)
-              back_propagate_error: &
-              do l = n_hidden,1,-1
-                delta(1:n(l),l) = matmul(transpose(w(1:n(l+1),1:n(l),l+1)), delta(1:n(l+1),l+1)) &
-                  * self%differentiable_activation_strategy_%activation_derivative(z(1:n(l),l))
-              end do back_propagate_error
-            end associate
-
-            block
-              integer j
-
-              sum_gradients: &
-              do l = 1,output_layer
-                dcdb(1:n(l),l) = dcdb(1:n(l),l) + delta(1:n(l),l)
-                do concurrent(j = 1:n(l))
-                  dcdw(j,1:n(l-1),l) = dcdw(j,1:n(l-1),l) + a(1:n(l-1),l-1)*delta(j,l)
-                end do
-              end do sum_gradients
-            end block
+                delta(1:n(output_layer),output_layer) = &
+                  (a(1:n(output_layer),output_layer) - y(1:n(output_layer))) &
+                  * self%differentiable_activation_strategy_%activation_derivative(z(1:n(output_layer),output_layer))
+              end associate
+
+              associate(n_hidden => self%num_layers()-2)
+                back_propagate_error: &
+                do l = n_hidden,1,-1
+                  delta(1:n(l),l) = matmul(transpose(w(1:n(l+1),1:n(l),l+1)), delta(1:n(l+1),l+1)) &
+                    * self%differentiable_activation_strategy_%activation_derivative(z(1:n(l),l))
+                end do back_propagate_error
+              end associate
+
+              block
+                integer j
+
+                sum_gradients: &
+                do l = 1,output_layer
+                  dcdb(1:n(l),l) = dcdb(1:n(l),l) + delta(1:n(l),l)
+                  do concurrent(j = 1:n(l))
+                    dcdw(j,1:n(l-1),l) = dcdw(j,1:n(l-1),l) + a(1:n(l-1),l-1)*delta(j,l)
+                  end do
+                end do sum_gradients
+              end block
 
-          end do iterate_through_batch
-
-          if (adam) then
-            block
-              ! Adam parameters  
-              real, parameter :: beta(*) = [.9_rkind, .999_rkind]
-              real, parameter :: obeta(*) = [1._rkind - beta(1), 1._rkind - beta(2)]
-              real, parameter :: epsilon = real(1.D-08,rkind)
-
-              adam_adjust_weights_and_biases: &
-              do concurrent(l = 1:output_layer)
-                dcdw(1:n(l),1:n(l-1),l) = dcdw(1:n(l),1:n(l-1),l)/(mini_batch_size)
-                vdw(1:n(l),1:n(l-1),l)  = beta(1)*vdw(1:n(l),1:n(l-1),l) + obeta(1)*dcdw(1:n(l),1:n(l-1),l)
-                sdw (1:n(l),1:n(l-1),l) = beta(2)*sdw(1:n(l),1:n(l-1),l) + obeta(2)*(dcdw(1:n(l),1:n(l-1),l)**2)
-                vdwc(1:n(l),1:n(l-1),l) = vdw(1:n(l),1:n(l-1),l)/(1._rkind - beta(1)**num_mini_batches)
-                sdwc(1:n(l),1:n(l-1),l) = sdw(1:n(l),1:n(l-1),l)/(1._rkind - beta(2)**num_mini_batches)
-                w(1:n(l),1:n(l-1),l) = w(1:n(l),1:n(l-1),l) &
-                  - alpha*vdwc(1:n(l),1:n(l-1),l)/(sqrt(sdwc(1:n(l),1:n(l-1),l))+epsilon) ! Adjust weights
-
-                dcdb(1:n(l),l) = dcdb(1:n(l),l)/mini_batch_size
-                vdb(1:n(l),l) = beta(1)*vdb(1:n(l),l) + obeta(1)*dcdb(1:n(l),l)
-                sdb(1:n(l),l) = beta(2)*sdb(1:n(l),l) + obeta(2)*(dcdb(1:n(l),l)**2)
-                vdbc(1:n(l),l) = vdb(1:n(l),l)/(1._rkind - beta(1)**num_mini_batches)
-                sdbc(1:n(l),l) = sdb(1:n(l),l)/(1._rkind - beta(2)**num_mini_batches)
-                b(1:n(l),l) = b(1:n(l),l) - alpha*vdbc(1:n(l),l)/(sqrt(sdbc(1:n(l),l))+epsilon) ! Adjust weights
-              end do adam_adjust_weights_and_biases
-            end block
-          else
-            adjust_weights_and_biases: &
-            do concurrent(l = 1:output_layer)
-              dcdb(1:n(l),l) = dcdb(1:n(l),l)/mini_batch_size
-              b(1:n(l),l) = b(1:n(l),l) - eta*dcdb(1:n(l),l) ! Adjust biases
-              dcdw(1:n(l),1:n(l-1),l) = dcdw(1:n(l),1:n(l-1),l)/mini_batch_size
-              w(1:n(l),1:n(l-1),l) = w(1:n(l),1:n(l-1),l) - eta*dcdw(1:n(l),1:n(l-1),l) ! Adjust weights
-            end do adjust_weights_and_biases
-          end if
-
-        end do iterate_across_batches
-
+            end do iterate_through_batch
+
+            if (adam) then
+              block
+                ! Adam parameters  
+                real, parameter :: beta(*) = [.9_rkind, .999_rkind]
+                real, parameter :: obeta(*) = [1._rkind - beta(1), 1._rkind - beta(2)]
+                real, parameter :: epsilon = real(1.D-08,rkind)
+
+                associate(alpha => learning_rate)
+                  adam_adjust_weights_and_biases: &
+                  do concurrent(l = 1:output_layer)
+                    dcdw(1:n(l),1:n(l-1),l) = dcdw(1:n(l),1:n(l-1),l)/(mini_batch_size)
+                    vdw(1:n(l),1:n(l-1),l)  = beta(1)*vdw(1:n(l),1:n(l-1),l) + obeta(1)*dcdw(1:n(l),1:n(l-1),l)
+                    sdw (1:n(l),1:n(l-1),l) = beta(2)*sdw(1:n(l),1:n(l-1),l) + obeta(2)*(dcdw(1:n(l),1:n(l-1),l)**2)
+                    vdwc(1:n(l),1:n(l-1),l) = vdw(1:n(l),1:n(l-1),l)/(1._rkind - beta(1)**num_mini_batches)
+                    sdwc(1:n(l),1:n(l-1),l) = sdw(1:n(l),1:n(l-1),l)/(1._rkind - beta(2)**num_mini_batches)
+                    w(1:n(l),1:n(l-1),l) = w(1:n(l),1:n(l-1),l) &
+                      - alpha*vdwc(1:n(l),1:n(l-1),l)/(sqrt(sdwc(1:n(l),1:n(l-1),l))+epsilon) ! Adjust weights
+
+                    dcdb(1:n(l),l) = dcdb(1:n(l),l)/mini_batch_size
+                    vdb(1:n(l),l) = beta(1)*vdb(1:n(l),l) + obeta(1)*dcdb(1:n(l),l)
+                    sdb(1:n(l),l) = beta(2)*sdb(1:n(l),l) + obeta(2)*(dcdb(1:n(l),l)**2)
+                    vdbc(1:n(l),l) = vdb(1:n(l),l)/(1._rkind - beta(1)**num_mini_batches)
+                    sdbc(1:n(l),l) = sdb(1:n(l),l)/(1._rkind - beta(2)**num_mini_batches)
+                    b(1:n(l),l) = b(1:n(l),l) - alpha*vdbc(1:n(l),l)/(sqrt(sdbc(1:n(l),l))+epsilon) ! Adjust weights
+                  end do adam_adjust_weights_and_biases
+                end associate
+              end block
+            else
+              associate(eta => learning_rate)
+                adjust_weights_and_biases: &
+                do concurrent(l = 1:output_layer)
+                  dcdb(1:n(l),l) = dcdb(1:n(l),l)/mini_batch_size
+                  b(1:n(l),l) = b(1:n(l),l) - eta*dcdb(1:n(l),l) ! Adjust biases
+                  dcdw(1:n(l),1:n(l-1),l) = dcdw(1:n(l),1:n(l-1),l)/mini_batch_size
+                  w(1:n(l),1:n(l-1),l) = w(1:n(l),1:n(l-1),l) - eta*dcdw(1:n(l),1:n(l-1),l) ! Adjust weights
+                end do adjust_weights_and_biases
+              end associate
+            end if
+          end do iterate_across_batches
+        end associate
       end associate
     end associate
-
   end procedure
 
 #ifdef __INTEL_COMPILER