Skip to content

Commit

Permalink
Merge pull request #169 from BerkeleyLab/flat-distribution-training
Browse files Browse the repository at this point in the history
Feature:  filter training data for maximal information entropy via flat multidimensional output-tensor histograms
  • Loading branch information
rouson committed Jul 8, 2024
2 parents 92ec89d + 3625bf6 commit 9a9fc0e
Show file tree
Hide file tree
Showing 10 changed files with 470 additions and 22 deletions.
385 changes: 385 additions & 0 deletions cloud-microphysics/app/train-on-flat-distribution.f90

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion cloud-microphysics/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,6 @@ echo "Usage:"
echo ""
echo "./build/run-fpm.sh run train-cloud-microphysics -- \ "
echo " --base <string> --epochs <integer> \ "
echo " [--start <integer>] [--end <integer>] [--stride <integer>]"
echo " [--start <integer>] [--end <integer>] [--stride <integer>] [--bins <integer]"
echo ""
echo "where angular brackets denote user-provided values and square brackets denote optional arguments"
24 changes: 12 additions & 12 deletions src/inference_engine/inference_engine_s.F90
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ pure subroutine difference_consistency(self)

end subroutine

impure function activation_factory(activation_name) result(activation)
impure function activation_factory_method(activation_name) result(activation)
character(len=*), intent(in) :: activation_name
class(activation_strategy_t), allocatable :: activation

Expand All @@ -148,7 +148,7 @@ impure function activation_factory(activation_name) result(activation)
case("relu")
activation = relu_t()
case default
error stop "inference_engine_s(activation_factory): unrecognized activation strategy '"//activation_name//"'"
error stop "inference_engine_s(activation_factory_method): unrecognized activation strategy '"//activation_name//"'"
end select
end function

Expand Down Expand Up @@ -183,17 +183,16 @@ impure function activation_factory(activation_name) result(activation)
end if
end block

associate(strings => inference_engine%metadata_%strings())
inference_engine%activation_strategy_ = activation_factory(strings(4)%string())
end associate
if (allocated(inference_engine%activation_strategy_)) deallocate(inference_engine%activation_strategy_)
allocate(inference_engine%activation_strategy_, source = activation_factory_method(metadata(4)%string()))

call assert_consistency(inference_engine)

end procedure construct_from_padded_arrays

module procedure from_json

type(string_t), allocatable :: lines(:), metadata(:)
type(string_t), allocatable :: lines(:)
type(tensor_range_t) input_range, output_range
type(layer_t) hidden_layers, output_layer
real(rkind), allocatable :: hidden_weights(:,:,:)
Expand All @@ -208,6 +207,7 @@ impure function activation_factory(activation_name) result(activation)
proto_neuron = neuron_t(weights=[0.], bias=0.)
#endif


lines = file_%lines()
call assert(adjustl(lines(1)%string())=="{", "inference_engine_s(from_json): expected outermost object '{'")

Expand Down Expand Up @@ -282,18 +282,18 @@ impure function activation_factory(activation_name) result(activation)
#ifndef _CRAYFTN
associate(proto_meta => metadata_t(string_t(""),string_t(""),string_t(""),string_t(""),string_t("")))
#endif
associate(metadata_object => metadata_t(lines(l:l+size(proto_meta%to_json())-1)))
inference_engine = hidden_layers%inference_engine(metadata_object%strings(), output_layer, input_range, output_range)
associate(metadata => metadata_t(lines(l:l+size(proto_meta%to_json())-1)))
associate(metadata_strings => metadata%strings())
inference_engine = hidden_layers%inference_engine(metadata_strings, output_layer, input_range, output_range)
if (allocated(inference_engine%activation_strategy_)) deallocate(inference_engine%activation_strategy_)
allocate(inference_engine%activation_strategy_, source = activation_factory_method(metadata_strings(4)%string()))
end associate
end associate
#ifndef _CRAYFTN
end associate
#endif
end associate ! associate(num_lines ... )

associate(strings => inference_engine%metadata_%strings())
inference_engine%activation_strategy_ = activation_factory(strings(4)%string())
end associate

call assert_consistency(inference_engine)

contains
Expand Down
6 changes: 6 additions & 0 deletions src/inference_engine/input_output_pair_m.f90
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ module input_output_pair_m
private
public :: input_output_pair_t
public :: shuffle
public :: write_to_stdout

type input_output_pair_t
private
Expand Down Expand Up @@ -46,6 +47,11 @@ module subroutine shuffle(pairs)
type(input_output_pair_t), intent(inout) :: pairs(:)
end subroutine

module subroutine write_to_stdout(input_output_pairs)
implicit none
type(input_output_pair_t), intent(in) :: input_output_pairs(:)
end subroutine

end interface

end module input_output_pair_m
7 changes: 7 additions & 0 deletions src/inference_engine/input_output_pair_s.f90
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,11 @@

end procedure

module procedure write_to_stdout
integer i
do i = 1, size(input_output_pairs)
print *, input_output_pairs(i)%inputs_%values(), " | ", input_output_pairs(i)%expected_outputs_%values()
end do
end procedure

end submodule input_output_pair_s
21 changes: 19 additions & 2 deletions src/inference_engine/tensor_range_m.f90
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,39 @@
module tensor_range_m
use tensor_m, only : tensor_t
use julienne_m, only : string_t
use kind_parameters_m, only : rkind
implicit none

private
public :: tensor_range_t
public :: phase_space_bin_t

type phase_space_bin_t
integer, allocatable :: loc(:)
end type

type tensor_range_t
private
character(len=:), allocatable :: layer_
real, allocatable, dimension(:) :: minima_, maxima_
real, allocatable, dimension(:) :: minima_, maxima_, bin_widths_
contains
procedure map_to_training_range
procedure map_from_training_range
procedure to_json
procedure bin
procedure in_range
generic :: operator(==) => equals
procedure, private :: equals
end type


interface tensor_range_t

pure module function from_components(layer, minima, maxima) result(tensor_range)
pure module function from_components(layer, minima, maxima, num_bins) result(tensor_range)
implicit none
character(len=*), intent(in) :: layer
real, dimension(:), intent(in) :: minima, maxima
integer, intent(in), optional :: num_bins
type(tensor_range_t) tensor_range
end function

Expand Down Expand Up @@ -66,6 +75,14 @@ elemental module function equals(lhs, rhs) result(lhs_equals_rhs)
logical lhs_equals_rhs
end function

pure module function bin(self, tensor, num_bins) result(phase_space_bin)
implicit none
class(tensor_range_t), intent(in) :: self
type(tensor_t), intent(in) :: tensor
integer, intent(in) :: num_bins
type(phase_space_bin_t) phase_space_bin
end function

elemental module function in_range(self, tensor) result(is_in_range)
implicit none
class(tensor_range_t), intent(in) :: self
Expand Down
14 changes: 14 additions & 0 deletions src/inference_engine/tensor_range_s.f90
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
tensor_range%layer_ = layer
tensor_range%minima_ = minima
tensor_range%maxima_ = maxima
if (present(num_bins)) then
tensor_range%bin_widths_ = (maxima - minima)/real(num_bins,rkind)
else
tensor_range%bin_widths_ = maxima - minima
end if
end procedure

module procedure from_json
Expand All @@ -31,6 +36,8 @@
end if
end do

tensor_range%bin_widths_ = tensor_range%maxima_ - tensor_range%minima_

call assert(tensor_range_key_found, "tensor_range_s(from_json): 'tensor_range' key found")
end procedure

Expand Down Expand Up @@ -91,6 +98,13 @@
end associate
end procedure

module procedure bin
real(rkind), parameter :: half = 0.5_rkind
associate(tensor_values => min(tensor%values(), self%maxima_ - half*self%bin_widths_))
phase_space_bin%loc = (tensor_values - self%minima_)/self%bin_widths_ + 1
end associate
end procedure

module procedure in_range
is_in_range = all(tensor%values() >= self%minima_) .and. all(tensor%values() <= self%maxima_)
end procedure
Expand Down
9 changes: 9 additions & 0 deletions src/inference_engine/trainable_engine_m.F90
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ module trainable_engine_m
use tensor_range_m, only : tensor_range_t
use mini_batch_m, only : mini_batch_t
use training_configuration_m, only : training_configuration_t
use input_output_pair_m, only : input_output_pair_t
implicit none

private
Expand All @@ -37,6 +38,7 @@ module trainable_engine_m
procedure :: map_from_input_training_range
procedure :: map_to_output_training_range
procedure :: map_from_output_training_range
procedure :: map_to_training_ranges
end type

integer, parameter :: input_layer = 0
Expand Down Expand Up @@ -154,6 +156,13 @@ elemental module function map_from_output_training_range(self, tensor) result(un
type(tensor_t) unnormalized_tensor
end function

elemental module function map_to_training_ranges(self, input_output_pair) result(normalized_input_output_pair)
implicit none
class(trainable_engine_t), intent(in) :: self
type(input_output_pair_t), intent(in) :: input_output_pair
type(input_output_pair_t) normalized_input_output_pair
end function

end interface

end module trainable_engine_m
20 changes: 15 additions & 5 deletions src/inference_engine/trainable_engine_s.F90
Original file line number Diff line number Diff line change
Expand Up @@ -292,11 +292,7 @@
end procedure

module procedure to_inference_engine
! assignment-stmt disallows the procedure from being pure because it might
! deallocate polymorphic allocatable subcomponent `activation_strategy_`
! TODO: consider how this affects design
inference_engine = inference_engine_t( &
self%metadata_%strings(), self%w, self%b, self%n, self%input_range_, self%output_range_)
inference_engine = inference_engine_t(self%metadata_%strings(), self%w, self%b, self%n, self%input_range_, self%output_range_)
end procedure

module procedure perturbed_identity_network
Expand Down Expand Up @@ -337,6 +333,20 @@ pure function e(j,n) result(unit_vector)

end procedure

module procedure map_to_training_ranges
associate( &
inputs => input_output_pair%inputs(), &
expected_outputs => input_output_pair%expected_outputs() &
)
associate( &
normalized_inputs => self%input_range_%map_to_training_range(inputs), &
normalized_outputs => self%output_range_%map_to_training_range(expected_outputs) &
)
normalized_input_output_pair = input_output_pair_t(normalized_inputs, normalized_outputs)
end associate
end associate
end procedure

module procedure map_to_input_training_range
normalized_tensor = self%input_range_%map_to_training_range(tensor)
end procedure
Expand Down
4 changes: 2 additions & 2 deletions src/inference_engine_m.f90
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ module inference_engine_m
use activation_strategy_m, only : activation_strategy_t
use differentiable_activation_strategy_m, only : differentiable_activation_strategy_t
use hyperparameters_m, only : hyperparameters_t
use input_output_pair_m, only : input_output_pair_t, shuffle
use input_output_pair_m, only : input_output_pair_t, shuffle, write_to_stdout
use inference_engine_m_, only : inference_engine_t, difference_t, infer
use kind_parameters_m, only : rkind
use metadata_m, only : metadata_t
Expand All @@ -17,7 +17,7 @@ module inference_engine_m
use step_m, only : step_t
use swish_m, only : swish_t
use tensor_m, only : tensor_t
use tensor_range_m, only : tensor_range_t
use tensor_range_m, only : tensor_range_t, phase_space_bin_t
use trainable_engine_m, only : trainable_engine_t
use training_configuration_m, only : training_configuration_t
use ubounds_m, only : ubounds_t
Expand Down

0 comments on commit 9a9fc0e

Please sign in to comment.