BerkeleyLab · rouson · Sep 20, 2023 · Sep 19, 2023 · Sep 19, 2023 · Sep 19, 2023
diff --git a/app/train-cloud-microphysics.f90 b/app/train-cloud-microphysics.f90
@@ -229,7 +229,6 @@ subroutine read_train_write
         end associate
       end associate
 
-
       associate(num_pairs => size(input_output_pairs), n_bins => 1) ! also tried n_bins => size(input_output_pairs)/10000
         bins = [(bin_t(num_items=num_pairs, num_bins=n_bins, bin_number=b), b = 1, n_bins)]
 

diff --git a/example/write-read-infer.f90 b/example/write-read-infer.f90
@@ -4,14 +4,12 @@ program write_read_infer
   !! This program demonstrates how to write a neural network to a JSON file,
   !! read the same network from the written file, query the network object for
   !! some of its properties, print those properties, and use the network to
-  !! perform inference.
-  use command_line_m, only : command_line_t
-  use inference_engine_m, only : inference_engine_t
-  use string_m, only : string_t
-  use step_m, only : step_t
-  use file_m, only : file_t
+  !! perform inference.  The network performs an identity mapping from any
+  !! non-negative inputs to the corresponding outputs using a RELU activation
+  !! function.
+  use inference_engine_m, only : inference_engine_t, relu_t, tensor_t
+  use sourcery_m, only : string_t, command_line_t, file_t
   use kind_parameters_m, only : rkind
-  use tensor_m, only : tensor_t
   implicit none
 
   type(string_t) file_name
@@ -21,43 +19,41 @@ program write_read_infer
 
   if (len(file_name%string())==0) then
     error stop new_line('a') // new_line('a') // &
-      'Usage: ./build/run-fpm.sh run --example write-read-infer -- --output-file "<file-name>"' 
+      'Usage: ./build/run-fpm.sh run --example identity -- --output-file "<file-name>"' 
   end if
 
   call write_read_query_infer(file_name)
 
 contains
 
-  function single_hidden_layer_xor_network() result(inference_engine)
+  function identity_network() result(inference_engine)
     type(inference_engine_t) inference_engine
-    integer, parameter :: nodes_per_layer(*) = [2, 3, 1]
+    integer, parameter :: nodes_per_layer(*) = [2, 2, 2]
     integer, parameter :: max_n = maxval(nodes_per_layer), layers = size(nodes_per_layer)
 
     inference_engine = inference_engine_t( &
-      metadata = [string_t("XOR"), string_t("Damian Rouson"), string_t("2023-07-02"), string_t("step"), string_t("false")], &
-      weights = reshape([real(rkind):: [1,1,0, 0,1,1, 0,0,0], [1,0,0, -2,0,0, 1,0,0]], [max_n, max_n, layers-1]), &
-      biases = reshape([[0.,-1.99,0.], [0., 0., 0.]], [max_n, layers-1]), &
+      metadata = [string_t("Identity"), string_t("Damian Rouson"), string_t("2023-09-18"), string_t("relu"), string_t("false")], &
+      weights = reshape([real(rkind):: [1,0, 0,1], [1,0, 0,1]], [max_n, max_n, layers-1]), &
+      biases = reshape([real(rkind):: [0,0], [0,0]], [max_n, layers-1]), &
       nodes = nodes_per_layer &
     )
+
   end function
 
   subroutine write_read_query_infer(output_file_name)
     type(string_t), intent(in) :: output_file_name
     type(string_t) activation_name
     integer i, j
     integer, parameter :: num_neurons = 3, num_hidden_layers = 2
-    integer, parameter :: identity(*,*,*) = & 
-      reshape([((merge(1,0,i==j), i=1,num_neurons), j=1,num_neurons)], shape=[num_neurons,num_neurons,num_hidden_layers-1])
-    type(inference_engine_t) xor_network, inference_engine
+    type(inference_engine_t) network, inference_engine
     type(file_t) json_output_file, json_input_file
-    type(tensor_t) network_outputs
-    real(rkind), parameter :: false = 0._rkind, true = 1._rkind
+    type(tensor_t) inputs, outputs 
 
     print *, "Constructing an inference_engine_t neural-network object from scratch."
-    xor_network = single_hidden_layer_xor_network()
+    network = identity_network()
 
     print *, "Converting an inference_engine_t object to a file_t object."
-    json_output_file = xor_network%to_json()
+    json_output_file = network%to_json()
 
     print *, "Writing an inference_engine_t object to the file '"//output_file_name%string()//"' in JSON format."
     call json_output_file%write_lines(output_file_name)
@@ -69,15 +65,17 @@ subroutine write_read_query_infer(output_file_name)
     inference_engine = inference_engine_t(json_input_file)
 
     print *, "Querying the new inference_engine_t object for several properties:"
-    print *, "number of outputs:", inference_engine%num_outputs()
-    print *, "nodes per layer:", inference_engine%nodes_per_layer()
+    print *, "Number of outputs:", inference_engine%num_outputs()
+    print *, "Number of inputs:", inference_engine%num_inputs()
+    print *, "Nodes per layer:", inference_engine%nodes_per_layer()
     activation_name = inference_engine%activation_function_name()
-    print *, "activation function: ", activation_name%string()
-    print *, "using skip connections: ", merge("true ", "false", inference_engine%skip())
+    print *, "Activation function: ", activation_name%string()
     print *, "Performing inference:"
-    network_outputs = inference_engine%infer(tensor_t([real(rkind):: false,true]))
-    print *, "inference_engine%infer([0.,1.]) =", network_outputs%values()
-    print *, "Correct answer for the XOR neural network: ", 1.
+    inputs = tensor_t([2.,3.])
+    print *, "Inputs: ", inputs%values()
+    outputs = inference_engine%infer(inputs)
+    print *, "Actual outputs: ", outputs%values()
+    print *, "Correct outputs:  ", inputs%values()
   end subroutine write_read_query_infer
 
 end program
diff --git a/src/inference_engine/inference_engine_s.f90 b/src/inference_engine/inference_engine_s.f90
@@ -6,6 +6,7 @@
   use step_m, only : step_t
   use swish_m, only : swish_t
   use sigmoid_m, only : sigmoid_t
+  use relu_m, only : relu_t
   use layer_m, only : layer_t
   use neuron_m, only : neuron_t
   use file_m, only : file_t
@@ -99,19 +100,23 @@ pure module subroutine difference_consistency(self)
 
   pure subroutine set_activation_strategy(inference_engine)
     type(inference_engine_t), intent(inout) :: inference_engine
+    character(len=:), allocatable :: function_name
     ! This code is called in both constructors and and can't be refactored into a factory method
     ! pattern because the result would need to be allocatable and polymorphic, which would preclude
     ! the function being pure so it wouldn't be possible to call it from inside the pure constructor
     ! functions.
-    select case(inference_engine%metadata_(findloc(key, "activationFunction", dim=1))%string())
+    function_name = inference_engine%metadata_(findloc(key, "activationFunction", dim=1))%string()
+    select case(function_name)
       case("swish")
         inference_engine%activation_strategy_ = swish_t()
       case("sigmoid")
         inference_engine%activation_strategy_ = sigmoid_t()
       case("step")
         inference_engine%activation_strategy_ = step_t()
+      case("relu")
+        inference_engine%activation_strategy_ = relu_t()
       case default
-        error stop "inference_engine_s(set_activation_strategy): unrecognized activation strategy"
+        error stop "inference_engine_s(set_activation_strategy): unrecognized activation strategy '"//function_name//"'"
     end select
   end subroutine
 

diff --git a/src/inference_engine/relu_m.f90 b/src/inference_engine/relu_m.f90
@@ -0,0 +1,40 @@
+! Copyright (c), The Regents of the University of California
+! Terms of use are as specified in LICENSE.txt
+module relu_m
+  use differentiable_activation_strategy_m, only : differentiable_activation_strategy_t
+  use kind_parameters_m, only : rkind
+  use string_m, only : string_t
+  implicit none
+
+  private
+  public :: relu_t 
+
+  type, extends(differentiable_activation_strategy_t) :: relu_t
+  contains
+     procedure, nopass :: activation
+     procedure, nopass :: activation_derivative
+     procedure, nopass :: function_name
+  end type
+
+  interface
+
+    elemental module function activation(x) result(y)
+      implicit none
+      real(rkind), intent(in) :: x
+      real(rkind) y
+    end function
+
+    elemental module function activation_derivative(x) result(y)
+      implicit none
+      real(rkind), intent(in) :: x
+      real(rkind) y
+    end function
+
+    elemental module function function_name() result(string)
+      implicit none
+      type(string_t) string
+    end function
+
+  end interface
+
+end module relu_m
diff --git a/src/inference_engine/relu_s.f90 b/src/inference_engine/relu_s.f90
@@ -0,0 +1,23 @@
+! Copyright (c), The Regents of the University of California
+! Terms of use are as specified in LICENSE.txt
+submodule(relu_m) relu_s
+  use kind_parameters_m, only : rkind
+  implicit none
+
+  real(rkind), parameter :: zero = 0._rkind, one = 1._rkind
+
+contains
+
+    module procedure activation
+      y = max(zero, x)
+    end procedure
+
+    module procedure activation_derivative
+      y = merge(one, zero, x>zero)
+    end procedure
+
+    module procedure function_name
+      string = string_t("relu")
+    end procedure
+
+end submodule relu_s
diff --git a/src/inference_engine/trainable_engine_m.f90 b/src/inference_engine/trainable_engine_m.f90
@@ -28,6 +28,7 @@ module trainable_engine_m
     procedure :: infer
     procedure :: num_layers
     procedure :: num_inputs
+    procedure :: num_outputs
     procedure :: to_inference_engine
   end type
 
@@ -81,6 +82,12 @@ elemental module function num_inputs(self) result(n_in)
       integer n_in
     end function
 
+    elemental module function num_outputs(self) result(n_out)
+      implicit none
+      class(trainable_engine_t), intent(in) :: self
+      integer n_out
+    end function
+
     elemental module function num_layers(self) result(n_layers)
       implicit none
       class(trainable_engine_t), intent(in) :: self

diff --git a/src/inference_engine/trainable_engine_s.f90 b/src/inference_engine/trainable_engine_s.f90
@@ -20,6 +20,10 @@
     n_layers = size(self%n,1)
   end procedure
 
+  module procedure num_outputs
+    n_out = self%n(ubound(self%n,1))
+  end procedure
+
   module procedure construct_from_inference_engine
 
     associate(exchange => inference_engine%to_exchange())
@@ -82,7 +86,7 @@
 
   module procedure train
     integer l, batch, mini_batch_size, pair
-    real(rkind), parameter :: eta = 3.e0 ! Learning parameter
+    real(rkind), parameter :: eta = 1.5e0 ! Learning parameter
     real(rkind), allocatable :: &
       z(:,:), a(:,:), delta(:,:), dcdw(:,:,:), dcdb(:,:), vdw(:,:,:), sdw(:,:,:), vdb(:,:), sdb(:,:), vdwc(:,:,:), sdwc(:,:,:), &
       vdbc(:,:), sdbc(:,:)

diff --git a/src/inference_engine_m.f90 b/src/inference_engine_m.f90
@@ -9,6 +9,7 @@ module inference_engine_m
  use kind_parameters_m, only : rkind
  use mini_batch_m, only : mini_batch_t
  use NetCDF_file_m, only : NetCDF_file_t 
+ use relu_m, only : relu_t
  use sigmoid_m, only : sigmoid_t
  use step_m, only : step_t
  use swish_m, only : swish_t