From 5eb8493f4303feadede75cf7f91c0e2dd0d53bf2 Mon Sep 17 00:00:00 2001 From: CompatHelper Julia Date: Sun, 4 Feb 2024 00:09:54 +0000 Subject: [PATCH 01/16] CompatHelper: bump compat for DynamicExpressions to 0.15, (keep existing compat) --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 3d8334072..77ecdd6e1 100644 --- a/Project.toml +++ b/Project.toml @@ -37,7 +37,7 @@ SymbolicRegressionSymbolicUtilsExt = "SymbolicUtils" [compat] Aqua = "0.7" Compat = "^4.2" -DynamicExpressions = "0.13" +DynamicExpressions = "0.13, 0.15" DynamicQuantities = "0.10" JSON3 = "1" LineSearches = "7" From 2c3b983e0548538bb6d459df83e8419458bfe6b1 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 4 Feb 2024 02:01:53 +0000 Subject: [PATCH 02/16] Only 0.15 of DynamicExpressions --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 77ecdd6e1..15a56989b 100644 --- a/Project.toml +++ b/Project.toml @@ -37,7 +37,7 @@ SymbolicRegressionSymbolicUtilsExt = "SymbolicUtils" [compat] Aqua = "0.7" Compat = "^4.2" -DynamicExpressions = "0.13, 0.15" +DynamicExpressions = "0.15" DynamicQuantities = "0.10" JSON3 = "1" LineSearches = "7" From f6fa6cd6acf1fc617f7e90e7e928218c9418941c Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 4 Feb 2024 02:25:36 +0000 Subject: [PATCH 03/16] Fix speed issue: optimization evaluating full-batch --- src/ConstantOptimization.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ConstantOptimization.jl b/src/ConstantOptimization.jl index 4c99f84e1..da39c549e 100644 --- a/src/ConstantOptimization.jl +++ b/src/ConstantOptimization.jl @@ -93,8 +93,8 @@ function _optimize_constants( if Optim.converged(result) _set_constants!(result.minimizer, constant_nodes) - member.score, member.loss = score_func(dataset, member, options) - num_evals += 1 + member.score, member.loss = score_func_batched(dataset, member, options; idx=idx) + num_evals += eval_fraction member.birth = get_birth_order(; deterministic=options.deterministic) else _set_constants!(x0, constant_nodes) From 00289acd9b71f15a7ef5f71cfaa943916dfa1ffd Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 4 Feb 2024 03:30:03 +0000 Subject: [PATCH 04/16] Full upgrade to new DynamicExpressions with refactored optimization --- Project.toml | 6 +- src/Configure.jl | 20 ------- src/ConstantOptimization.jl | 91 +++++++++++------------------- src/InterfaceDynamicExpressions.jl | 21 +++---- src/MLJInterface.jl | 1 + src/Options.jl | 34 ++++++++--- src/OptionsStruct.jl | 16 +++++- test/runtests.jl | 3 + test/test_derivatives.jl | 8 +-- test/test_mixed.jl | 7 ++- 10 files changed, 99 insertions(+), 108 deletions(-) diff --git a/Project.toml b/Project.toml index 15a56989b..713c79a45 100644 --- a/Project.toml +++ b/Project.toml @@ -36,11 +36,13 @@ SymbolicRegressionSymbolicUtilsExt = "SymbolicUtils" [compat] Aqua = "0.7" +Bumper = "0.6" Compat = "^4.2" DynamicExpressions = "0.15" DynamicQuantities = "0.10" JSON3 = "1" LineSearches = "7" +LoopVectorization = "0.12" LossFunctions = "0.10, 0.11" MLJModelInterface = "1.5, 1.6, 1.7, 1.8" MacroTools = "0.4, 0.5" @@ -58,9 +60,11 @@ julia = "1.6" [extras] Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +Bumper = "8ce10254-0962-460f-a3d8-1f77fea1446e" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" MLJTestInterface = "72560011-54dd-4dc2-94f3-c5de45b75ecd" SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" @@ -70,4 +74,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["Test", "SafeTestsets", "Aqua", "ForwardDiff", "LinearAlgebra", "JSON3", "MLJBase", "MLJTestInterface", "Suppressor", "SymbolicUtils", "Zygote"] +test = ["Test", "SafeTestsets", "Aqua", "Bumper", "ForwardDiff", "LinearAlgebra", "LoopVectorization", "JSON3", "MLJBase", "MLJTestInterface", "Suppressor", "SymbolicUtils", "Zygote"] diff --git a/src/Configure.jl b/src/Configure.jl index ffcceca8d..9c4837e15 100644 --- a/src/Configure.jl +++ b/src/Configure.jl @@ -114,14 +114,6 @@ end function move_functions_to_workers( procs, options::Options, dataset::Dataset{T}, verbosity ) where {T} - enable_autodiff = - :diff_binops in fieldnames(typeof(options.operators)) && - :diff_unaops in fieldnames(typeof(options.operators)) && - ( - options.operators.diff_binops !== nothing || - options.operators.diff_unaops !== nothing - ) - # All the types of functions we need to move to workers: function_sets = ( :unaops, @@ -140,18 +132,6 @@ function move_functions_to_workers( elseif function_set == :binops ops = options.operators.binops example_inputs = (zero(T), zero(T)) - elseif function_set == :diff_unaops - if !enable_autodiff - continue - end - ops = options.operators.diff_unaops - example_inputs = (zero(T),) - elseif function_set == :diff_binops - if !enable_autodiff - continue - end - ops = options.operators.diff_binops - example_inputs = (zero(T), zero(T)) elseif function_set == :elementwise_loss if typeof(options.elementwise_loss) <: SupervisedLoss continue diff --git a/src/ConstantOptimization.jl b/src/ConstantOptimization.jl index da39c549e..60f303540 100644 --- a/src/ConstantOptimization.jl +++ b/src/ConstantOptimization.jl @@ -2,30 +2,12 @@ module ConstantOptimizationModule using LineSearches: LineSearches using Optim: Optim -using DynamicExpressions: Node, count_constants +using DynamicExpressions: Node, count_constants, get_constant_refs using ..CoreModule: Options, Dataset, DATA_TYPE, LOSS_TYPE using ..UtilsModule: get_birth_order -using ..LossFunctionsModule: score_func, eval_loss, batch_sample +using ..LossFunctionsModule: eval_loss, loss_to_score, batch_sample using ..PopMemberModule: PopMember -# Proxy function for optimization -@inline function opt_func( - x, dataset::Dataset{T,L}, tree, constant_nodes, options, idx -) where {T<:DATA_TYPE,L<:LOSS_TYPE} - _set_constants!(x, constant_nodes) - # TODO(mcranmer): This should use score_func batching. - loss = eval_loss(tree, dataset, options; regularization=false, idx=idx) - return loss::L -end - -function _set_constants!(x::AbstractArray{T}, constant_nodes) where {T} - for (xi, node) in zip(x, constant_nodes) - node.val::T = xi - end - return nothing -end - -# Use Nelder-Mead to optimize the constants in an equation function optimize_constants( dataset::Dataset{T,L}, member::PopMember{T,L}, options::Options )::Tuple{PopMember{T,L},Float64} where {T<:DATA_TYPE,L<:LOSS_TYPE} @@ -42,62 +24,57 @@ function dispatch_optimize_constants( ) where {T<:DATA_TYPE,L<:LOSS_TYPE} nconst = count_constants(member.tree) nconst == 0 && return (member, 0.0) - if T <: Complex - # TODO: Make this more general. Also, do we even need Newton here at all?? - algorithm = Optim.BFGS(; linesearch=LineSearches.BackTracking())#order=3)) - return _optimize_constants( - dataset, member, options, algorithm, options.optimizer_options, idx - ) - elseif nconst == 1 + if nconst == 1 && !(T <: Complex) algorithm = Optim.Newton(; linesearch=LineSearches.BackTracking()) return _optimize_constants( dataset, member, options, algorithm, options.optimizer_options, idx ) - else - if options.optimizer_algorithm == "NelderMead" - algorithm = Optim.NelderMead(; linesearch=LineSearches.BackTracking()) - return _optimize_constants( - dataset, member, options, algorithm, options.optimizer_options, idx - ) - elseif options.optimizer_algorithm == "BFGS" - algorithm = Optim.BFGS(; linesearch=LineSearches.BackTracking())#order=3)) - return _optimize_constants( - dataset, member, options, algorithm, options.optimizer_options, idx - ) - else - error("Optimization function not implemented.") - end end + return _optimize_constants( + dataset, + member, + options, + options.optimizer_algorithm, + options.optimizer_options, + idx, + ) end function _optimize_constants( dataset, member::PopMember{T,L}, options, algorithm, optimizer_options, idx )::Tuple{PopMember{T,L},Float64} where {T,L} tree = member.tree - constant_nodes = filter(t -> t.degree == 0 && t.constant, tree) - x0 = [n.val::T for n in constant_nodes] - f(x) = opt_func(x, dataset, tree, constant_nodes, options, idx) - result = Optim.optimize(f, x0, algorithm, optimizer_options) - num_evals = 0.0 - num_evals += result.f_calls + eval_fraction = options.batching ? (options.batch_size / dataset.n) : 1.0 + f(t) = eval_loss(t, dataset, options; regularization=false, idx=idx)::L + baseline = f(tree) + result = Optim.optimize(f, tree, algorithm, optimizer_options) + num_evals = result.f_calls * eval_fraction # Try other initial conditions: - for i in 1:(options.optimizer_nrestarts) - new_start = x0 .* (T(1) .+ T(1//2) * randn(T, size(x0, 1))) - tmpresult = Optim.optimize(f, new_start, algorithm, optimizer_options) - num_evals += tmpresult.f_calls + for _ in 1:(options.optimizer_nrestarts) + tmptree = copy(tree) + foreach(tmptree) do node + if node.degree == 0 && node.constant + node.val::T = (node.val::T) * (T(1) + T(1//2) * randn(T)) + end + end + tmpresult = Optim.optimize( + f, tmptree, algorithm, optimizer_options; make_copy=false + ) + num_evals += tmpresult.f_calls * eval_fraction if tmpresult.minimum < result.minimum result = tmpresult end end - if Optim.converged(result) - _set_constants!(result.minimizer, constant_nodes) - member.score, member.loss = score_func_batched(dataset, member, options; idx=idx) - num_evals += eval_fraction + if result.minimum < baseline + member.tree = result.minimizer + member.loss = result.minimum + member.score = loss_to_score( + member.loss, dataset.use_baseline, dataset.baseline_loss, member, options + ) member.birth = get_birth_order(; deterministic=options.deterministic) - else - _set_constants!(x0, constant_nodes) + num_evals += eval_fraction end return member, num_evals diff --git a/src/InterfaceDynamicExpressions.jl b/src/InterfaceDynamicExpressions.jl index a76677485..54ba5a970 100644 --- a/src/InterfaceDynamicExpressions.jl +++ b/src/InterfaceDynamicExpressions.jl @@ -3,6 +3,7 @@ module InterfaceDynamicExpressionsModule using Printf: @sprintf using DynamicExpressions: DynamicExpressions using DynamicExpressions: OperatorEnum, GenericOperatorEnum, Node +using DynamicExpressions.EquationModule: needs_brackets using DynamicQuantities: dimension, ustrip using ..CoreModule: Options using ..CoreModule.OptionsModule: inverse_binopmap, inverse_unaopmap @@ -52,7 +53,9 @@ which speed up evaluation significantly. to the equation. """ function eval_tree_array(tree::Node, X::AbstractArray, options::Options; kws...) - return eval_tree_array(tree, X, options.operators; turbo=options.turbo, kws...) + return eval_tree_array( + tree, X, options.operators; turbo=options.turbo, bumper=options.bumper, kws... + ) end """ @@ -68,7 +71,6 @@ respect to `x1`. - `tree::Node`: The expression tree to evaluate. - `X::AbstractArray`: The data matrix, with each column being a data point. - `options::Options`: The options containing the operators used to create the `tree`. - `enable_autodiff` must be set to `true` when creating the options. This is needed to create the derivative operations. - `direction::Int`: The index of the variable to take the derivative with respect to. @@ -96,7 +98,6 @@ to every constant in the expression. - `tree::Node`: The expression tree to evaluate. - `X::AbstractArray`: The data matrix, with each column being a data point. - `options::Options`: The options containing the operators used to create the `tree`. - `enable_autodiff` must be set to `true` when creating the options. This is needed to create the derivative operations. - `variable::Bool`: Whether to take derivatives with respect to features (i.e., `X` - with `variable=true`), or with respect to every constant in the expression (`variable=false`). @@ -156,8 +157,7 @@ Convert an equation to a string. tree, options.operators; f_variable=(feature, vname) -> string_variable(feature, vname, X_sym_units), - f_constant=(val, bracketed) -> - string_constant(val, bracketed, vprecision, "[⋅]"), + f_constant=(val, bracketed) -> string_constant(val, vprecision, "[⋅]"), variable_names=display_variable_names, kws..., ) @@ -166,7 +166,7 @@ Convert an equation to a string. tree, options.operators; f_variable=string_variable, - f_constant=(val, bracketed) -> string_constant(val, bracketed, vprecision, ""), + f_constant=(val,) -> string_constant(val, vprecision, ""), variable_names=display_variable_names, kws..., ) @@ -191,11 +191,8 @@ function string_variable(feature, variable_names, variable_units=nothing) end return base end -function string_constant( - val, bracketed, ::Val{precision}, unit_placeholder -) where {precision} - does_not_need_brackets = typeof(val) <: Real - if does_not_need_brackets +function string_constant(val, ::Val{precision}, unit_placeholder) where {precision} + if typeof(val) <: Real return sprint_precision(val, Val(precision)) * unit_placeholder else return "(" * string(val) * ")" * unit_placeholder @@ -284,7 +281,7 @@ function define_alias_operators(operators) end function (tree::Node)(X, options::Options; kws...) - return tree(X, options.operators; turbo=options.turbo, kws...) + return tree(X, options.operators; turbo=options.turbo, bumper=options.bumper, kws...) end function DynamicExpressions.EvaluationHelpersModule._grad_evaluator( tree::Node, X, options::Options; kws... diff --git a/src/MLJInterface.jl b/src/MLJInterface.jl index 22835c66a..d3486a428 100644 --- a/src/MLJInterface.jl +++ b/src/MLJInterface.jl @@ -1,6 +1,7 @@ module MLJInterfaceModule using Optim: Optim +using LineSearches: LineSearches using MLJModelInterface: MLJModelInterface as MMI using DynamicExpressions: eval_tree_array, string_tree, Node using DynamicQuantities: diff --git a/src/Options.jl b/src/Options.jl index 80c1ef23a..3ec180670 100644 --- a/src/Options.jl +++ b/src/Options.jl @@ -6,6 +6,8 @@ using StatsBase: StatsBase using DynamicExpressions: OperatorEnum, Node, string_tree using Distributed: nworkers using LossFunctions: L2DistLoss, SupervisedLoss +using Optim: Optim +using LineSearches: LineSearches #TODO - eventually move some of these # into the SR call itself, rather than # passing huge options at once. @@ -165,6 +167,7 @@ const deprecated_options_mapping = NamedTuple([ :earlyStopCondition => :early_stop_condition, :stateReturn => :deprecated_return_state, :return_state => :deprecated_return_state, + :enable_autodiff => :deprecated_enable_autodiff, :ns => :tournament_selection_n, :loss => :elementwise_loss, ]) @@ -280,6 +283,7 @@ const OPTION_DESCRIPTIONS = """- `binary_operators`: Vector of binary operators - `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions. This can be significantly faster, but is only compatible with certain operators. *Experimental!* +- `bumper`: Whether to use Bumper.jl for faster evaluation. *Experimental!* - `migration`: Whether to migrate equations between processes. - `hof_migration`: Whether to migrate equations from the hall of fame to processes. @@ -294,7 +298,7 @@ const OPTION_DESCRIPTIONS = """- `binary_operators`: Vector of binary operators - `optimizer_nrestarts`: How many different random starting positions to consider for optimization of constants. - `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default - is "BFGS", but "NelderMead" is also supported. + is `Optim.BFGS(linesearch=LineSearches.BackTracking())`. - `optimizer_options`: General options for the constant optimization. For details we refer to the documentation on `Optim.Options` from the `Optim.jl` package. Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a @@ -330,8 +334,6 @@ const OPTION_DESCRIPTIONS = """- `binary_operators`: Vector of binary operators - `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform. - `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated expression with the original expression and proceed normally. -- `enable_autodiff`: Whether to enable automatic differentiation functionality. This is turned off by default. - If turned on, this will be turned off if one of the operators does not have well-defined gradients. - `nested_constraints`: Specifies how many times a combination of operators can be nested. For example, `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`, but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos` @@ -376,6 +378,7 @@ function Options end maxsize::Integer=20, maxdepth::Union{Nothing,Integer}=nothing, turbo::Bool=false, + bumper::Bool=false, migration::Bool=true, hof_migration::Bool=true, should_simplify::Union{Nothing,Bool}=nothing, @@ -405,7 +408,9 @@ function Options end una_constraints=nothing, progress::Union{Bool,Nothing}=nothing, terminal_width::Union{Nothing,Integer}=nothing, - optimizer_algorithm::AbstractString="BFGS", + optimizer_algorithm::Union{AbstractString,Optim.AbstractOptimizer}=Optim.BFGS(; + linesearch=LineSearches.BackTracking() + ), optimizer_nrestarts::Integer=2, optimizer_probability::Real=0.14, optimizer_iterations::Union{Nothing,Integer}=nothing, @@ -416,7 +421,6 @@ function Options end timeout_in_seconds::Union{Nothing,Real}=nothing, max_evals::Union{Nothing,Integer}=nothing, skip_mutation_failures::Bool=true, - enable_autodiff::Bool=false, nested_constraints=nothing, deterministic::Bool=false, # Not search options; just construction options: @@ -459,6 +463,7 @@ function Options end k == :earlyStopCondition && (early_stop_condition = kws[k]; true) && continue k == :return_state && (deprecated_return_state = kws[k]; true) && continue k == :stateReturn && (deprecated_return_state = kws[k]; true) && continue + k == :enable_autodiff && continue k == :ns && (tournament_selection_n = kws[k]; true) && continue k == :loss && (elementwise_loss = kws[k]; true) && continue if k == :mutationWeights @@ -491,6 +496,17 @@ function Options end Base.depwarn("`npopulations` is deprecated. Use `populations` instead.", :Options) populations = npopulations end + if optimizer_algorithm isa AbstractString + Base.depwarn( + "The `optimizer_algorithm` argument should be an `AbstractOptimizer`, not a string.", + :Options, + ) + optimizer_algorithm = if optimizer_algorithm == "NelderMead" + Optim.NelderMead(; linesearch=LineSearches.BackTracking()) + else + Optim.BFGS(; linesearch=LineSearches.BackTracking()) + end + end if elementwise_loss === nothing elementwise_loss = L2DistLoss() @@ -669,7 +685,6 @@ function Options end OperatorEnum(; binary_operators=binary_operators, unary_operators=unary_operators, - enable_autodiff=false, # Not needed; we just want the constructors define_helper_functions=true, empty_old_operators=true, ) @@ -681,7 +696,6 @@ function Options end operators = OperatorEnum(; binary_operators=binary_operators, unary_operators=unary_operators, - enable_autodiff=enable_autodiff, define_helper_functions=define_helper_functions, empty_old_operators=false, ) @@ -735,6 +749,9 @@ function Options end typeof(operators), use_recorder, typeof(optimizer_options), + typeof(optimizer_algorithm), + turbo, + bumper, typeof(tournament_selection_weights), }( operators, @@ -749,7 +766,8 @@ function Options end alpha, maxsize, maxdepth, - turbo, + Val(turbo), + Val(bumper), migration, hof_migration, should_simplify, diff --git a/src/OptionsStruct.jl b/src/OptionsStruct.jl index dad2c46ca..b80df6753 100644 --- a/src/OptionsStruct.jl +++ b/src/OptionsStruct.jl @@ -135,7 +135,16 @@ function ComplexityMapping(; ) end -struct Options{CT,OP<:AbstractOperatorEnum,use_recorder,OPT<:Optim.Options,W} +struct Options{ + CT, + OP<:AbstractOperatorEnum, + use_recorder, + OPT<:Optim.Options, + OPT_A<:Optim.AbstractOptimizer, + _turbo, + _bumper, + W, +} operators::OP bin_constraints::Vector{Tuple{Int,Int}} una_constraints::Vector{Int} @@ -148,7 +157,8 @@ struct Options{CT,OP<:AbstractOperatorEnum,use_recorder,OPT<:Optim.Options,W} alpha::Float32 maxsize::Int maxdepth::Int - turbo::Bool + turbo::Val{_turbo} + bumper::Val{_bumper} migration::Bool hof_migration::Bool should_simplify::Bool @@ -181,7 +191,7 @@ struct Options{CT,OP<:AbstractOperatorEnum,use_recorder,OPT<:Optim.Options,W} loss_function::Union{Nothing,Function} progress::Union{Bool,Nothing} terminal_width::Union{Int,Nothing} - optimizer_algorithm::String + optimizer_algorithm::OPT_A optimizer_probability::Float32 optimizer_nrestarts::Int optimizer_options::OPT diff --git a/test/runtests.jl b/test/runtests.jl index e06afebba..624e0d2c2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -6,6 +6,9 @@ ENV["SYMBOLIC_REGRESSION_TEST"] = "true" @safetestset "Aqua tests" begin include("test_aqua.jl") end +# Trigger extensions: +using LoopVectorization, Bumper, Zygote + @safetestset "Unit tests" begin include("unittest.jl") end diff --git a/test/test_derivatives.jl b/test/test_derivatives.jl index 5c70e4a97..e8948237f 100644 --- a/test/test_derivatives.jl +++ b/test/test_derivatives.jl @@ -44,9 +44,7 @@ for type in [Float16, Float32, Float64] X = rand(rng, type, nfeatures, N) * 5 options = Options(; - binary_operators=(+, *, -, /, pow_abs2), - unary_operators=(custom_cos, exp, sin), - enable_autodiff=true, + binary_operators=(+, *, -, /, pow_abs2), unary_operators=(custom_cos, exp, sin) ) @extend_operators options @@ -128,9 +126,7 @@ println("Testing NodeIndex.") using SymbolicRegression: get_constants, NodeIndex, index_constants options = Options(; - binary_operators=(+, *, -, /, pow_abs2), - unary_operators=(custom_cos, exp, sin), - enable_autodiff=true, + binary_operators=(+, *, -, /, pow_abs2), unary_operators=(custom_cos, exp, sin) ) @extend_operators options tree = equation3(nx1, nx2, nx3) diff --git a/test/test_mixed.jl b/test/test_mixed.jl index db68c4e0a..bff1693d9 100644 --- a/test/test_mixed.jl +++ b/test/test_mixed.jl @@ -21,6 +21,7 @@ for i in 0:5 use_frequency = false use_frequency_in_tournament = false turbo = false + bumper = false T = Float32 print("Testing with batching=$(batching) and weighted=$(weighted), ") if i == 0 @@ -37,11 +38,14 @@ for i in 0:5 use_frequency = true parallelism = "multiprocessing" elseif i == 3 - println("with multi-threading and crossover and use_frequency_in_tournament") + println( + "with multi-threading and crossover and use_frequency_in_tournament and bumper=true", + ) parallelism = :multithreading numprocs = nothing crossover_probability = 0.02f0 use_frequency_in_tournament = true + bumper = true elseif i == 4 println( "with crossover and skip mutation failures and both frequencies options, and Float16 type", @@ -80,6 +84,7 @@ for i in 0:5 use_frequency=use_frequency, use_frequency_in_tournament=use_frequency_in_tournament, turbo=turbo, + bumper=bumper, ) end From f4755237207c557d00b51d2941db99a192a82996 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 4 Feb 2024 03:32:33 +0000 Subject: [PATCH 05/16] Clean up test parameters --- test/test_params.jl | 4 +++- test/test_print.jl | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/test/test_params.jl b/test/test_params.jl index 3dda38b1c..18a5b4aff 100644 --- a/test/test_params.jl +++ b/test/test_params.jl @@ -1,4 +1,6 @@ using SymbolicRegression: L2DistLoss, MutationWeights +using Optim: Optim +using LineSearches: LineSearches using Test: Test maximum_residual = 1e-2 @@ -48,7 +50,7 @@ default_params = ( una_constraints=nothing, progress=false, terminal_width=nothing, - optimizer_algorithm="NelderMead", + optimizer_algorithm=Optim.NelderMead(; linesearch=LineSearches.BackTracking()), optimizer_nrestarts=3, optimizer_probability=0.1f0, optimizer_iterations=100, diff --git a/test/test_print.jl b/test/test_print.jl index 920ffe0da..99052851a 100644 --- a/test/test_print.jl +++ b/test/test_print.jl @@ -14,7 +14,7 @@ f = (x1, x2, x3) -> (sin(cos(sin(cos(x1) * x3) * 3.0) * -0.5) + 2.0) * 5.0 tree = f(Node("x1"), Node("x2"), Node("x3")) s = repr(tree) -true_s = "((sin(cos(sin(cos(x1) * x3) * 3.0) * -0.5) + 2.0) * 5.0)" +true_s = "(sin(cos(sin(cos(x1) * x3) * 3.0) * -0.5) + 2.0) * 5.0" @test s == true_s @@ -28,7 +28,7 @@ equation_search( ) s = repr(tree) -true_s = "((sin(cos(sin(cos(v1) * v3) * 3.0) * -0.5) + 2.0) * 5.0)" +true_s = "(sin(cos(sin(cos(v1) * v3) * 3.0) * -0.5) + 2.0) * 5.0" @test s == true_s for unaop in [safe_log, safe_log2, safe_log10, safe_log1p, safe_sqrt, safe_acosh] @@ -44,7 +44,7 @@ for binop in [safe_pow, ^] default_params..., binary_operators=(+, *, /, -, binop), unary_operators=(cos,) ) minitree = Node(5, Node("x1"), Node("x2")) - @test string_tree(minitree, opts) == "(x1 ^ x2)" + @test string_tree(minitree, opts) == "x1 ^ x2" end @testset "Test splitting of strings" begin From 1579d479ff88dfe9ee9a52615495d20ad96fe508 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 4 Feb 2024 03:41:06 +0000 Subject: [PATCH 06/16] Fix other issues with DynamicExpressions.jl upgrade --- src/InterfaceDynamicExpressions.jl | 2 +- test/test_derivatives.jl | 26 -------------------------- 2 files changed, 1 insertion(+), 27 deletions(-) diff --git a/src/InterfaceDynamicExpressions.jl b/src/InterfaceDynamicExpressions.jl index 54ba5a970..e9e455229 100644 --- a/src/InterfaceDynamicExpressions.jl +++ b/src/InterfaceDynamicExpressions.jl @@ -157,7 +157,7 @@ Convert an equation to a string. tree, options.operators; f_variable=(feature, vname) -> string_variable(feature, vname, X_sym_units), - f_constant=(val, bracketed) -> string_constant(val, vprecision, "[⋅]"), + f_constant=(val,) -> string_constant(val, vprecision, "[⋅]"), variable_names=display_variable_names, kws..., ) diff --git a/test/test_derivatives.jl b/test/test_derivatives.jl index e8948237f..73af7d62f 100644 --- a/test/test_derivatives.jl +++ b/test/test_derivatives.jl @@ -120,29 +120,3 @@ for type in [Float16, Float32, Float64] @test array_test(predicted_grad, true_grad) println("Done.") end - -println("Testing NodeIndex.") - -using SymbolicRegression: get_constants, NodeIndex, index_constants - -options = Options(; - binary_operators=(+, *, -, /, pow_abs2), unary_operators=(custom_cos, exp, sin) -) -@extend_operators options -tree = equation3(nx1, nx2, nx3) - -"""Check whether the ordering of constant_list is the same as the ordering of node_index.""" -function check_tree(tree::Node, node_index::NodeIndex, constant_list::AbstractVector) - if tree.degree == 0 - (!tree.constant) || tree.val == constant_list[node_index.constant_index] - elseif tree.degree == 1 - check_tree(tree.l, node_index.l, constant_list) - else - check_tree(tree.l, node_index.l, constant_list) && - check_tree(tree.r, node_index.r, constant_list) - end -end - -@test check_tree(tree, index_constants(tree), get_constants(tree)) - -println("Done.") From 45b0ca8a66101fb22c71fcd75602669ecff86e55 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 4 Feb 2024 12:13:29 +0000 Subject: [PATCH 07/16] Add way to load extensions on remote workers --- src/Configure.jl | 53 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/src/Configure.jl b/src/Configure.jl index 9c4837e15..4bdd46ffb 100644 --- a/src/Configure.jl +++ b/src/Configure.jl @@ -118,8 +118,6 @@ function move_functions_to_workers( function_sets = ( :unaops, :binops, - :diff_unaops, - :diff_binops, :elementwise_loss, :early_stop_condition, :loss_function, @@ -209,26 +207,45 @@ function activate_env_on_workers(procs, project_path::String, options::Options, end function import_module_on_workers(procs, filename::String, options::Options, verbosity) - included_local = !("SymbolicRegression" in [k.name for (k, v) in Base.loaded_modules]) - if included_local - verbosity > 0 && @info "Importing local module ($filename) on workers..." - @everywhere procs begin - # Parse functions on every worker node - Base.MainInclude.eval( - quote - include($$filename) - using .SymbolicRegression - end, - ) + loaded_modules_head_worker = [k.name for (k, _) in Base.loaded_modules] + + included_as_local = "SymbolicRegression" ∉ loaded_modules_head_worker + expr = if included_as_local + quote + include($filename) + using .SymbolicRegression end - verbosity > 0 && @info "Finished!" else - verbosity > 0 && @info "Importing installed module on workers..." - @everywhere procs begin - Base.MainInclude.eval(:(using SymbolicRegression)) + quote + using SymbolicRegression end - verbosity > 0 && @info "Finished!" end + + # Need to import any extension code, if loaded on head node + relevant_extensions = [ + :SymbolicUtils, + :Bumper, + :LoopVectorization, + :Zygote, + :CUDA, + :Enzyme + ] + filter!(m -> String(m) ∈ loaded_modules_head_worker, relevant_extensions) + # HACK TODO – this workaround is very fragile. Likely need to submit a bug report + # to JuliaLang. + + for ext in relevant_extensions + push!(expr.args, quote using $ext: $ext end) + end + + verbosity > 0 && + if isempty(relevant_extensions) + @info "Importing SymbolicRegression on workers." + else + @info "Importing SymbolicRegression on workers as well as extensions $(join(relevant_extensions, ',' * ' '))." + end + @everywhere procs Base.MainInclude.eval($expr) + verbosity > 0 && @info "Finished!" end function test_module_on_workers(procs, options::Options, verbosity) From 5773f7384febde148d7dcb0f8a0661a2ed61dfab Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 4 Feb 2024 12:15:51 +0000 Subject: [PATCH 08/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/Configure.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Configure.jl b/src/Configure.jl index 4bdd46ffb..7528b63fe 100644 --- a/src/Configure.jl +++ b/src/Configure.jl @@ -238,7 +238,7 @@ function import_module_on_workers(procs, filename::String, options::Options, ver push!(expr.args, quote using $ext: $ext end) end - verbosity > 0 && + verbosity > 0 && if isempty(relevant_extensions) @info "Importing SymbolicRegression on workers." else From 73442d0a95162664e8b8cc390d80780292bedb1c Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 7 Feb 2024 12:40:24 +0000 Subject: [PATCH 09/16] Fix string function import --- src/InterfaceDynamicExpressions.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/InterfaceDynamicExpressions.jl b/src/InterfaceDynamicExpressions.jl index e9e455229..39a03570a 100644 --- a/src/InterfaceDynamicExpressions.jl +++ b/src/InterfaceDynamicExpressions.jl @@ -3,7 +3,7 @@ module InterfaceDynamicExpressionsModule using Printf: @sprintf using DynamicExpressions: DynamicExpressions using DynamicExpressions: OperatorEnum, GenericOperatorEnum, Node -using DynamicExpressions.EquationModule: needs_brackets +using DynamicExpressions.StringsModule: needs_brackets using DynamicQuantities: dimension, ustrip using ..CoreModule: Options using ..CoreModule.OptionsModule: inverse_binopmap, inverse_unaopmap From 09f75a6968c49a3469ee7a79797df59389884194 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Mon, 19 Feb 2024 20:15:12 +0000 Subject: [PATCH 10/16] Formatting --- src/Configure.jl | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/Configure.jl b/src/Configure.jl index 48c73141e..9b1f96674 100644 --- a/src/Configure.jl +++ b/src/Configure.jl @@ -219,29 +219,28 @@ function import_module_on_workers(procs, filename::String, options::Options, ver # Need to import any extension code, if loaded on head node relevant_extensions = [ - :SymbolicUtils, - :Bumper, - :LoopVectorization, - :Zygote, - :CUDA, - :Enzyme + :SymbolicUtils, :Bumper, :LoopVectorization, :Zygote, :CUDA, :Enzyme ] filter!(m -> String(m) ∈ loaded_modules_head_worker, relevant_extensions) # HACK TODO – this workaround is very fragile. Likely need to submit a bug report # to JuliaLang. for ext in relevant_extensions - push!(expr.args, quote using $ext: $ext end) + push!( + expr.args, + quote + using $ext: $ext + end, + ) end - verbosity > 0 && - if isempty(relevant_extensions) - @info "Importing SymbolicRegression on workers." - else - @info "Importing SymbolicRegression on workers as well as extensions $(join(relevant_extensions, ',' * ' '))." - end + verbosity > 0 && if isempty(relevant_extensions) + @info "Importing SymbolicRegression on workers." + else + @info "Importing SymbolicRegression on workers as well as extensions $(join(relevant_extensions, ',' * ' '))." + end @everywhere procs Base.MainInclude.eval($expr) - verbosity > 0 && @info "Finished!" + return verbosity > 0 && @info "Finished!" end function test_module_on_workers( From a44d01778f0f4b937289d3cc2ea010c8afa4ec19 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 19 Feb 2024 20:16:05 +0000 Subject: [PATCH 11/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 6e5b7ebe0..6783d4940 100644 --- a/Project.toml +++ b/Project.toml @@ -74,4 +74,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["Test", "SafeTestsets", "Aqua", "Bumper", "ForwardDiff", "LinearAlgebra", "LoopVectorization", "JSON3", "MLJBase", "MLJTestInterface", "Suppressor", "SymbolicUtils", "Zygote"] \ No newline at end of file +test = ["Test", "SafeTestsets", "Aqua", "Bumper", "ForwardDiff", "LinearAlgebra", "LoopVectorization", "JSON3", "MLJBase", "MLJTestInterface", "Suppressor", "SymbolicUtils", "Zygote"] From 5b3b720d83829a7921aa35c2667f38e3a22aa74a Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 21 Feb 2024 19:19:34 +0000 Subject: [PATCH 12/16] Bump DE version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 6783d4940..ae5464b01 100644 --- a/Project.toml +++ b/Project.toml @@ -38,7 +38,7 @@ SymbolicRegressionSymbolicUtilsExt = "SymbolicUtils" Aqua = "0.7" Bumper = "0.6" Compat = "^4.2" -DynamicExpressions = "0.15" +DynamicExpressions = "0.16" DynamicQuantities = "0.10 - 0.12" JSON3 = "1" LineSearches = "7" From 4211454421eb0039958a5c30fe857e9db2c9d16c Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 21 Feb 2024 19:28:01 +0000 Subject: [PATCH 13/16] Clean up use of `.val::T` --- src/ConstantOptimization.jl | 2 +- src/DimensionalAnalysis.jl | 2 +- src/MutationFunctions.jl | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ConstantOptimization.jl b/src/ConstantOptimization.jl index 16f4131a4..7ffecbcb0 100644 --- a/src/ConstantOptimization.jl +++ b/src/ConstantOptimization.jl @@ -54,7 +54,7 @@ function _optimize_constants( tmptree = copy(tree) foreach(tmptree) do node if node.degree == 0 && node.constant - node.val::T = (node.val::T) * (T(1) + T(1//2) * randn(T)) + node.val = (node.val) * (T(1) + T(1//2) * randn(T)) end end tmpresult = Optim.optimize( diff --git a/src/DimensionalAnalysis.jl b/src/DimensionalAnalysis.jl index 638a5cee6..460448cc3 100644 --- a/src/DimensionalAnalysis.jl +++ b/src/DimensionalAnalysis.jl @@ -119,7 +119,7 @@ end @inline function deg0_eval( x::AbstractVector{T}, x_units::Vector{Q}, t::AbstractExpressionNode{T} ) where {T,R,Q<:AbstractQuantity{T,R}} - t.constant && return WildcardQuantity{Q}(Quantity(t.val::T, R), true, false) + t.constant && return WildcardQuantity{Q}(Quantity(t.val, R), true, false) return WildcardQuantity{Q}( (@inbounds x[t.feature]) * (@inbounds x_units[t.feature]), false, false ) diff --git a/src/MutationFunctions.jl b/src/MutationFunctions.jl index 8f909c7c4..aec05d57e 100644 --- a/src/MutationFunctions.jl +++ b/src/MutationFunctions.jl @@ -68,13 +68,13 @@ function mutate_constant( makeConstBigger = rand(Bool) if makeConstBigger - node.val::T *= factor + node.val *= factor else - node.val::T /= factor + node.val /= factor end if rand() > options.probability_negate_constant - node.val::T *= -1 + node.val *= -1 end return tree From 07d85e057973cbe954351c54461f6ec64e229142 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 21 Feb 2024 20:57:48 +0000 Subject: [PATCH 14/16] Fix test for new operator enum --- test/test_simplification.jl | 9 --------- 1 file changed, 9 deletions(-) diff --git a/test/test_simplification.jl b/test/test_simplification.jl index 0c7ae9fa3..90193bda3 100644 --- a/test/test_simplification.jl +++ b/test/test_simplification.jl @@ -49,15 +49,6 @@ x1, x2, x3 = Node("x1"), Node("x2"), Node("x3") pow_abs2(x, y) = abs(x)^y custom_cos(x) = cos(x)^2 -# Define for Node (usually these are done internally to Options) -pow_abs2(l::Node, r::Node)::Node = - (l.constant && r.constant) ? Node(pow_abs2(l.val, r.val)::Real) : Node(5, l, r) -pow_abs2(l::Node, r::Real)::Node = - l.constant ? Node(pow_abs2(l.val, r)::Real) : Node(5, l, r) -pow_abs2(l::Real, r::Node)::Node = - r.constant ? Node(pow_abs2(l, r.val)::Real) : Node(5, l, r) -custom_cos(x::Node)::Node = x.constant ? Node(custom_cos(x.val)::Real) : Node(1, x) - options = Options(; binary_operators=(+, *, -, /, pow_abs2), unary_operators=(custom_cos, exp, sin) ) From 9a97ac93c49b6e736b4c9972de4c787534110bc6 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sat, 9 Mar 2024 19:31:58 +0000 Subject: [PATCH 15/16] Fix simplification test --- test/test_simplification.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_simplification.jl b/test/test_simplification.jl index 90193bda3..0518c77c8 100644 --- a/test/test_simplification.jl +++ b/test/test_simplification.jl @@ -52,6 +52,7 @@ custom_cos(x) = cos(x)^2 options = Options(; binary_operators=(+, *, -, /, pow_abs2), unary_operators=(custom_cos, exp, sin) ) +@extend_operators options tree = ( ((x2 + x2) * ((-0.5982493 / pow_abs2(x1, x2)) / -0.54734415)) + ( sin( From 645141bea3de01c933ef6474992be7b94d730650 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sat, 9 Mar 2024 20:10:14 +0000 Subject: [PATCH 16/16] Fix bug in member loss not being updated --- src/ConstantOptimization.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ConstantOptimization.jl b/src/ConstantOptimization.jl index 7ffecbcb0..f813e05fa 100644 --- a/src/ConstantOptimization.jl +++ b/src/ConstantOptimization.jl @@ -69,7 +69,7 @@ function _optimize_constants( if result.minimum < baseline member.tree = result.minimizer - member.loss = result.minimum + member.loss = eval_loss(member.tree, dataset, options; regularization=true, idx=idx) member.score = loss_to_score( member.loss, dataset.use_baseline, dataset.baseline_loss, member, options )