Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: filter training data for maximal information entropy via flat multidimensional output-tensor histograms #169

Merged
merged 1 commit into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
385 changes: 385 additions & 0 deletions cloud-microphysics/app/train-on-flat-distribution.f90

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion cloud-microphysics/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,6 @@ echo "Usage:"
echo ""
echo "./build/run-fpm.sh run train-cloud-microphysics -- \ "
echo " --base <string> --epochs <integer> \ "
echo " [--start <integer>] [--end <integer>] [--stride <integer>]"
echo " [--start <integer>] [--end <integer>] [--stride <integer>] [--bins <integer]"
echo ""
echo "where angular brackets denote user-provided values and square brackets denote optional arguments"
24 changes: 12 additions & 12 deletions src/inference_engine/inference_engine_s.F90
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ pure subroutine difference_consistency(self)

end subroutine

impure function activation_factory(activation_name) result(activation)
impure function activation_factory_method(activation_name) result(activation)
character(len=*), intent(in) :: activation_name
class(activation_strategy_t), allocatable :: activation

Expand All @@ -148,7 +148,7 @@ impure function activation_factory(activation_name) result(activation)
case("relu")
activation = relu_t()
case default
error stop "inference_engine_s(activation_factory): unrecognized activation strategy '"//activation_name//"'"
error stop "inference_engine_s(activation_factory_method): unrecognized activation strategy '"//activation_name//"'"
end select
end function

Expand Down Expand Up @@ -183,17 +183,16 @@ impure function activation_factory(activation_name) result(activation)
end if
end block

associate(strings => inference_engine%metadata_%strings())
inference_engine%activation_strategy_ = activation_factory(strings(4)%string())
end associate
if (allocated(inference_engine%activation_strategy_)) deallocate(inference_engine%activation_strategy_)
allocate(inference_engine%activation_strategy_, source = activation_factory_method(metadata(4)%string()))

call assert_consistency(inference_engine)

end procedure construct_from_padded_arrays

module procedure from_json

type(string_t), allocatable :: lines(:), metadata(:)
type(string_t), allocatable :: lines(:)
type(tensor_range_t) input_range, output_range
type(layer_t) hidden_layers, output_layer
real(rkind), allocatable :: hidden_weights(:,:,:)
Expand All @@ -208,6 +207,7 @@ impure function activation_factory(activation_name) result(activation)
proto_neuron = neuron_t(weights=[0.], bias=0.)
#endif


lines = file_%lines()
call assert(adjustl(lines(1)%string())=="{", "inference_engine_s(from_json): expected outermost object '{'")

Expand Down Expand Up @@ -282,18 +282,18 @@ impure function activation_factory(activation_name) result(activation)
#ifndef _CRAYFTN
associate(proto_meta => metadata_t(string_t(""),string_t(""),string_t(""),string_t(""),string_t("")))
#endif
associate(metadata_object => metadata_t(lines(l:l+size(proto_meta%to_json())-1)))
inference_engine = hidden_layers%inference_engine(metadata_object%strings(), output_layer, input_range, output_range)
associate(metadata => metadata_t(lines(l:l+size(proto_meta%to_json())-1)))
associate(metadata_strings => metadata%strings())
inference_engine = hidden_layers%inference_engine(metadata_strings, output_layer, input_range, output_range)
if (allocated(inference_engine%activation_strategy_)) deallocate(inference_engine%activation_strategy_)
allocate(inference_engine%activation_strategy_, source = activation_factory_method(metadata_strings(4)%string()))
end associate
end associate
#ifndef _CRAYFTN
end associate
#endif
end associate ! associate(num_lines ... )

associate(strings => inference_engine%metadata_%strings())
inference_engine%activation_strategy_ = activation_factory(strings(4)%string())
end associate

call assert_consistency(inference_engine)

contains
Expand Down
6 changes: 6 additions & 0 deletions src/inference_engine/input_output_pair_m.f90
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ module input_output_pair_m
private
public :: input_output_pair_t
public :: shuffle
public :: write_to_stdout

type input_output_pair_t
private
Expand Down Expand Up @@ -46,6 +47,11 @@ module subroutine shuffle(pairs)
type(input_output_pair_t), intent(inout) :: pairs(:)
end subroutine

module subroutine write_to_stdout(input_output_pairs)
implicit none
type(input_output_pair_t), intent(in) :: input_output_pairs(:)
end subroutine

end interface

end module input_output_pair_m
7 changes: 7 additions & 0 deletions src/inference_engine/input_output_pair_s.f90
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,11 @@

end procedure

module procedure write_to_stdout
integer i
do i = 1, size(input_output_pairs)
print *, input_output_pairs(i)%inputs_%values(), " | ", input_output_pairs(i)%expected_outputs_%values()
end do
end procedure

end submodule input_output_pair_s
21 changes: 19 additions & 2 deletions src/inference_engine/tensor_range_m.f90
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,39 @@
module tensor_range_m
use tensor_m, only : tensor_t
use julienne_m, only : string_t
use kind_parameters_m, only : rkind
implicit none

private
public :: tensor_range_t
public :: phase_space_bin_t

type phase_space_bin_t
integer, allocatable :: loc(:)
end type

type tensor_range_t
private
character(len=:), allocatable :: layer_
real, allocatable, dimension(:) :: minima_, maxima_
real, allocatable, dimension(:) :: minima_, maxima_, bin_widths_
contains
procedure map_to_training_range
procedure map_from_training_range
procedure to_json
procedure bin
procedure in_range
generic :: operator(==) => equals
procedure, private :: equals
end type


interface tensor_range_t

pure module function from_components(layer, minima, maxima) result(tensor_range)
pure module function from_components(layer, minima, maxima, num_bins) result(tensor_range)
implicit none
character(len=*), intent(in) :: layer
real, dimension(:), intent(in) :: minima, maxima
integer, intent(in), optional :: num_bins
type(tensor_range_t) tensor_range
end function

Expand Down Expand Up @@ -66,6 +75,14 @@ elemental module function equals(lhs, rhs) result(lhs_equals_rhs)
logical lhs_equals_rhs
end function

pure module function bin(self, tensor, num_bins) result(phase_space_bin)
implicit none
class(tensor_range_t), intent(in) :: self
type(tensor_t), intent(in) :: tensor
integer, intent(in) :: num_bins
type(phase_space_bin_t) phase_space_bin
end function

elemental module function in_range(self, tensor) result(is_in_range)
implicit none
class(tensor_range_t), intent(in) :: self
Expand Down
14 changes: 14 additions & 0 deletions src/inference_engine/tensor_range_s.f90
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
tensor_range%layer_ = layer
tensor_range%minima_ = minima
tensor_range%maxima_ = maxima
if (present(num_bins)) then
tensor_range%bin_widths_ = (maxima - minima)/real(num_bins,rkind)
else
tensor_range%bin_widths_ = maxima - minima
end if
end procedure

module procedure from_json
Expand All @@ -31,6 +36,8 @@
end if
end do

tensor_range%bin_widths_ = tensor_range%maxima_ - tensor_range%minima_

call assert(tensor_range_key_found, "tensor_range_s(from_json): 'tensor_range' key found")
end procedure

Expand Down Expand Up @@ -91,6 +98,13 @@
end associate
end procedure

module procedure bin
real(rkind), parameter :: half = 0.5_rkind
associate(tensor_values => min(tensor%values(), self%maxima_ - half*self%bin_widths_))
phase_space_bin%loc = (tensor_values - self%minima_)/self%bin_widths_ + 1
end associate
end procedure

module procedure in_range
is_in_range = all(tensor%values() >= self%minima_) .and. all(tensor%values() <= self%maxima_)
end procedure
Expand Down
9 changes: 9 additions & 0 deletions src/inference_engine/trainable_engine_m.F90
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ module trainable_engine_m
use tensor_range_m, only : tensor_range_t
use mini_batch_m, only : mini_batch_t
use training_configuration_m, only : training_configuration_t
use input_output_pair_m, only : input_output_pair_t
implicit none

private
Expand All @@ -37,6 +38,7 @@ module trainable_engine_m
procedure :: map_from_input_training_range
procedure :: map_to_output_training_range
procedure :: map_from_output_training_range
procedure :: map_to_training_ranges
end type

integer, parameter :: input_layer = 0
Expand Down Expand Up @@ -154,6 +156,13 @@ elemental module function map_from_output_training_range(self, tensor) result(un
type(tensor_t) unnormalized_tensor
end function

elemental module function map_to_training_ranges(self, input_output_pair) result(normalized_input_output_pair)
implicit none
class(trainable_engine_t), intent(in) :: self
type(input_output_pair_t), intent(in) :: input_output_pair
type(input_output_pair_t) normalized_input_output_pair
end function

end interface

end module trainable_engine_m
20 changes: 15 additions & 5 deletions src/inference_engine/trainable_engine_s.F90
Original file line number Diff line number Diff line change
Expand Up @@ -292,11 +292,7 @@
end procedure

module procedure to_inference_engine
! assignment-stmt disallows the procedure from being pure because it might
! deallocate polymorphic allocatable subcomponent `activation_strategy_`
! TODO: consider how this affects design
inference_engine = inference_engine_t( &
self%metadata_%strings(), self%w, self%b, self%n, self%input_range_, self%output_range_)
inference_engine = inference_engine_t(self%metadata_%strings(), self%w, self%b, self%n, self%input_range_, self%output_range_)
end procedure

module procedure perturbed_identity_network
Expand Down Expand Up @@ -337,6 +333,20 @@ pure function e(j,n) result(unit_vector)

end procedure

module procedure map_to_training_ranges
associate( &
inputs => input_output_pair%inputs(), &
expected_outputs => input_output_pair%expected_outputs() &
)
associate( &
normalized_inputs => self%input_range_%map_to_training_range(inputs), &
normalized_outputs => self%output_range_%map_to_training_range(expected_outputs) &
)
normalized_input_output_pair = input_output_pair_t(normalized_inputs, normalized_outputs)
end associate
end associate
end procedure

module procedure map_to_input_training_range
normalized_tensor = self%input_range_%map_to_training_range(tensor)
end procedure
Expand Down
4 changes: 2 additions & 2 deletions src/inference_engine_m.f90
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ module inference_engine_m
use activation_strategy_m, only : activation_strategy_t
use differentiable_activation_strategy_m, only : differentiable_activation_strategy_t
use hyperparameters_m, only : hyperparameters_t
use input_output_pair_m, only : input_output_pair_t, shuffle
use input_output_pair_m, only : input_output_pair_t, shuffle, write_to_stdout
use inference_engine_m_, only : inference_engine_t, difference_t, infer
use kind_parameters_m, only : rkind
use metadata_m, only : metadata_t
Expand All @@ -17,7 +17,7 @@ module inference_engine_m
use step_m, only : step_t
use swish_m, only : swish_t
use tensor_m, only : tensor_t
use tensor_range_m, only : tensor_range_t
use tensor_range_m, only : tensor_range_t, phase_space_bin_t
use trainable_engine_m, only : trainable_engine_t
use training_configuration_m, only : training_configuration_t
use ubounds_m, only : ubounds_t
Expand Down
Loading