Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: "Thick" Pareto frontier #400

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/Core.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ using .OptionsStructModule:
AbstractOptions,
Options,
ComplexityMapping,
AbstractParetoOptions,
ParetoSingleOptions,
ParetoTopKOptions,
specialized_options,
operator_specialization
using .OperatorsModule:
Expand Down
36 changes: 30 additions & 6 deletions src/ExpressionBuilder.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ using DynamicExpressions:
AbstractExpressionNode, AbstractExpression, constructorof, with_metadata
using StatsBase: StatsBase
using ..CoreModule: AbstractOptions, Dataset
using ..HallOfFameModule: HallOfFame
using ..HallOfFameModule: HallOfFame, ParetoSingle, ParetoTopK
using ..PopulationModule: Population
using ..PopMemberModule: PopMember

Expand Down Expand Up @@ -124,20 +124,32 @@ end
pop::Population, options::AbstractOptions, dataset::Dataset{T,L}
) where {T,L}
return Population(
map(Fix{2}(Fix{3}(embed_metadata, dataset), options), pop.members)
map(member -> embed_metadata(member, options, dataset), pop.members)
)
end
function embed_metadata(
el::ParetoSingle, options::AbstractOptions, dataset::Dataset{T,L}
) where {T,L}
return ParetoSingle(embed_metadata(el.member, options, dataset))
end
function embed_metadata(
el::ParetoTopK, options::AbstractOptions, dataset::Dataset{T,L}
) where {T,L}
return ParetoTopK(
map(member -> embed_metadata(member, options, dataset), el.members), el.k
)
end
function embed_metadata(
hof::HallOfFame, options::AbstractOptions, dataset::Dataset{T,L}
) where {T,L}
return HallOfFame(
map(Fix{2}(Fix{3}(embed_metadata, dataset), options), hof.members), hof.exists
map(el -> embed_metadata(el, options, dataset), hof.elements), hof.exists
)
end
function embed_metadata(
vec::Vector{H}, options::AbstractOptions, dataset::Dataset{T,L}
sets::Vector{H}, options::AbstractOptions, dataset::Dataset{T,L}
) where {T,L,H<:Union{HallOfFame,Population,PopMember}}
return map(Fix{2}(Fix{3}(embed_metadata, dataset), options), vec)
return map(set -> embed_metadata(set, options, dataset), sets)
end
end

Expand Down Expand Up @@ -171,11 +183,23 @@ function strip_metadata(
) where {T,L}
return Population(map(member -> strip_metadata(member, options, dataset), pop.members))
end
function strip_metadata(
el::ParetoSingle, options::AbstractOptions, dataset::Dataset{T,L}
) where {T,L}
return ParetoSingle(strip_metadata(el.member, options, dataset))
end
function strip_metadata(
el::ParetoTopK, options::AbstractOptions, dataset::Dataset{T,L}
) where {T,L}
return ParetoTopK(
map(member -> strip_metadata(member, options, dataset), el.members), el.k
)
end
function strip_metadata(
hof::HallOfFame, options::AbstractOptions, dataset::Dataset{T,L}
) where {T,L}
return HallOfFame(
map(member -> strip_metadata(member, options, dataset), hof.members), hof.exists
map(el -> strip_metadata(el, options, dataset), hof.elements), hof.exists
)
end

Expand Down
255 changes: 213 additions & 42 deletions src/HallOfFame.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,75 @@ module HallOfFameModule

using StyledStrings: @styled_str
using DynamicExpressions: AbstractExpression, string_tree
using Printf: @sprintf
using ..UtilsModule: split_string, AnnotatedIOBuffer, dump_buffer
using ..CoreModule: ParetoSingleOptions, ParetoTopKOptions
using ..CoreModule: AbstractOptions, Dataset, DATA_TYPE, LOSS_TYPE, relu, create_expression
using ..ComplexityModule: compute_complexity
using ..PopMemberModule: PopMember
using ..InterfaceDynamicExpressionsModule: format_dimensions, WILDCARD_UNIT_STRING
using Printf: @sprintf
using ..PopulationModule: Population

"""
AbstractParetoElement{P<:PopMember}

Abstract type for storing elements on the Pareto frontier.

# Subtypes
- `ParetoSingle`: Stores a single member at each complexity level
- `ParetoTopK`: Stores multiple members at each complexity level in a fixed-size bucket
"""
HallOfFame{T<:DATA_TYPE,L<:LOSS_TYPE}
abstract type AbstractParetoElement{P<:PopMember} end

pop_member_type(::Type{<:AbstractParetoElement{P}}) where {P} = P

struct ParetoSingle{T,L,N,P<:PopMember{T,L,N}} <: AbstractParetoElement{P}
member::P
end
struct ParetoTopK{T,L,N,P<:PopMember{T,L,N}} <: AbstractParetoElement{P}
members::Vector{P}
k::Int
end

Base.copy(el::ParetoSingle) = ParetoSingle(copy(el.member))
Base.copy(el::ParetoTopK) = ParetoTopK(sizehint!(map(copy, el.members), el.k + 1), el.k)

Base.first(el::ParetoSingle) = el.member
Base.first(el::ParetoTopK) = first(el.members)

Base.iterate(el::ParetoSingle) = (el.member, nothing)
Base.iterate(::ParetoSingle, ::Nothing) = nothing
Base.iterate(el::ParetoTopK) = iterate(el.members)
Base.iterate(el::ParetoTopK, state) = iterate(el.members, state)

function Base.show(io::IO, mime::MIME"text/plain", el::ParetoSingle)
print(io, "ParetoSingle(")
show(io, mime, el.member)
print(io, ")")
return nothing
end

function _depwarn_pareto_single(funcsym::Symbol)
Base.depwarn(
"Hall of fame `.members` is now `.elements` which is a vector of `AbstractParetoElement` objects. ",
funcsym,
)
return nothing
end

@inline function Base.getproperty(s::ParetoSingle, name::Symbol)
name == :member && return getfield(s, :member)
_depwarn_pareto_single(:getproperty)
return getproperty(s.member, name)
end
@inline function Base.setproperty!(s::ParetoSingle, name::Symbol, value)
name == :member && return setfield!(s, :member, value)
_depwarn_pareto_single(:setproperty!)
return setproperty!(s.member, name, value)
end

"""
HallOfFame{T<:DATA_TYPE,L<:LOSS_TYPE,N<:AbstractExpression{T}}

List of the best members seen all time in `.members`, with `.members[c]` being
the best member seen at complexity c. Including only the members which actually
Expand All @@ -22,23 +82,39 @@ have been set, you can run `.members[exists]`.
These are ordered by complexity, with `.members[1]` the member with complexity 1.
- `exists::Array{Bool,1}`: Whether the member at the given complexity has been set.
"""
struct HallOfFame{T<:DATA_TYPE,L<:LOSS_TYPE,N<:AbstractExpression{T}}
members::Array{PopMember{T,L,N},1}
exists::Array{Bool,1} #Whether it has been set
struct HallOfFame{
T<:DATA_TYPE,
L<:LOSS_TYPE,
N<:AbstractExpression{T},
H<:AbstractParetoElement{<:PopMember{T,L,N}},
}
elements::Vector{H}
exists::Vector{Bool}
end
pop_member_type(::Type{<:HallOfFame{T,L,N,H}}) where {T,L,N,H} = pop_member_type(H)
@inline function Base.getproperty(hof::HallOfFame, name::Symbol)
if name == :members
Base.depwarn(
"HallOfFame.members is deprecated. Use HallOfFame.elements instead.",
:getproperty,
)
return getfield(hof, :elements)
end
return getfield(hof, name)
end
function Base.show(io::IO, mime::MIME"text/plain", hof::HallOfFame{T,L,N}) where {T,L,N}
println(io, "HallOfFame{...}:")
for i in eachindex(hof.members, hof.exists)
s_member, s_exists = if hof.exists[i]
sprint((io, m) -> show(io, mime, m), hof.members[i]), "true"
for i in eachindex(hof.elements, hof.exists)
s_element, s_exists = if hof.exists[i]
sprint((io, m) -> show(io, mime, m), hof.elements[i]), "true"
else
"undef", "false"
end
println(io, " "^4 * ".exists[$i] = $s_exists")
print(io, " "^4 * ".members[$i] =")
splitted = split(strip(s_member), '\n')
print(io, " "^4 * ".elements[$i] =")
splitted = split(strip(s_element), '\n')
if length(splitted) == 1
println(io, " " * s_member)
println(io, " " * s_element)
else
println(io)
foreach(line -> println(io, " "^8 * line), splitted)
Expand All @@ -61,58 +137,153 @@ Arguments:
- `dataset`: Dataset containing the input data.
"""
function HallOfFame(
options::AbstractOptions, dataset::Dataset{T,L}
options::AbstractOptions, dataset::Dataset{T,L};
) where {T<:DATA_TYPE,L<:LOSS_TYPE}
base_tree = create_expression(zero(T), options, dataset)
N = typeof(base_tree)
member = PopMember(
base_tree, L(0), L(Inf), options; parent=-1, deterministic=options.deterministic
)

return HallOfFame{T,L,typeof(base_tree)}(
return HallOfFame(
[
PopMember(
copy(base_tree),
L(0),
L(Inf),
options;
parent=-1,
deterministic=options.deterministic,
) for i in 1:(options.maxsize)
init_pareto_element(options.pareto_element_options, member) for
i in 1:(options.maxsize)
],
[false for i in 1:(options.maxsize)],
)
end
Base.copy(hof::HallOfFame) = HallOfFame(map(copy, hof.elements), copy(hof.exists))

function Base.copy(hof::HallOfFame)
return HallOfFame(
[copy(member) for member in hof.members], [exists for exists in hof.exists]
)
function init_pareto_element(::Union{ParetoSingleOptions,ParetoSingle}, member::PopMember)
return ParetoSingle(copy(member))
end
function init_pareto_element(opt::Union{ParetoTopKOptions,ParetoTopK}, member::PopMember)
members = sizehint!(typeof(member)[], opt.k + 1)
push!(members, copy(member))
return ParetoTopK(members, opt.k)
end

function Base.push!(hof::HallOfFame, (size, member)::Pair{<:Integer,<:PopMember})
maxsize = length(hof.elements)
if 0 < size <= maxsize
if !hof.exists[size]
hof.elements[size] = init_pareto_element(hof.elements[size], member)
hof.exists[size] = true
else
hof.elements[size] = push!(hof.elements[size], member.score => member)
end
end
return hof
end

function Base.push!(el::ParetoSingle, (score, member)::Pair{<:LOSS_TYPE,<:PopMember})
return el.member.score > score ? ParetoSingle(copy(member)) : el
end
function Base.push!(el::ParetoTopK, (score, member)::Pair{<:LOSS_TYPE,<:PopMember})
if isempty(el.members)
push!(el.members, copy(member))
return el
elseif el.members[end].score <= score
# No update needed
return el
elseif el.members[1].score > score
pushfirst!(el.members, copy(member))
else
# Find the first member with worse score
i = findfirst(m -> m.score > score, el.members)::Int
# member assumes that position, and pushes the array forward
insert!(el.members, i, copy(member))
end

if length(el.members) > el.k
pop!(el.members)
end

return el
end

function Base.append!(hof::HallOfFame, pop::Population; options::AbstractOptions)
for member in pop.members
size = compute_complexity(member, options)
push!(hof, size => member)
end
return hof
end

function Base.merge!(hof1::HallOfFame, hof2::HallOfFame)
for i in eachindex(hof1.elements, hof1.exists, hof2.elements, hof2.exists)
if hof1.exists[i] && hof2.exists[i]
hof1.elements[i] = merge(hof1.elements[i], hof2.elements[i])
elseif !hof1.exists[i] && hof2.exists[i]
hof1.elements[i] = copy(hof2.elements[i])
hof1.exists[i] = true
else
# do nothing, as !hof2.exists[i]
end
end
return hof1
end
function Base.merge(el1::ParetoSingle, el2::ParetoSingle)
# Remember: we want the MIN score (bad API choice, but we're stuck with it for now)
return el1.member.score <= el2.member.score ? el1 : copy(el2)
end
function Base.merge(el1::ParetoTopK, el2::ParetoTopK)
P = pop_member_type(typeof(el1))
new_neighborhood = sizehint!(P[], el1.k + 1)
i1 = firstindex(el1.members)
n1 = length(el1.members)
i2 = firstindex(el2.members)
n2 = length(el2.members)
i = 1
while i1 <= n1 && i2 <= n2 && i <= el1.k
m1 = el1.members[i1]
m2 = el2.members[i2]
if m1.score <= m2.score
# TODO: Is it safe that we don't copy here? I think so; since we are merging
# onto el1 (see `Base.merge!`), but perhaps someone could misuse this.
push!(new_neighborhood, m1)
i1 += 1
else
push!(new_neighborhood, copy(m2))
i2 += 1
end
i += 1
end
return ParetoTopK(new_neighborhood, el1.k)
end

"""
calculate_pareto_frontier(hallOfFame::HallOfFame{T,L,P}) where {T<:DATA_TYPE,L<:LOSS_TYPE}
calculate_pareto_frontier(hof::HallOfFame)

Compute the dominating pareto curve - each returned member must be better than all simpler equations.
"""
function calculate_pareto_frontier(hallOfFame::HallOfFame{T,L,N}) where {T,L,N}
# TODO - remove dataset from args.
P = PopMember{T,L,N}
# Dominating pareto curve - must be better than all simpler equations
function calculate_pareto_frontier(hof::HallOfFame)
P = pop_member_type(typeof(hof))
dominating = P[]
for size in eachindex(hallOfFame.members)
if !hallOfFame.exists[size]
for i in eachindex(hof.elements)
if !hof.exists[i]
continue
end
member = hallOfFame.members[size]
# We check if this member is better than all members which are smaller than it and
# also exist.
betterThanAllSmaller = true
for i in 1:(size - 1)
if !hallOfFame.exists[i]
element = hof.elements[i]
member = first(element)
# We check if this member is better than all
# elements which are smaller than it and also exist.
is_dominating = true
for j in 1:(i - 1)
if !hof.exists[j]
continue
end
simpler_member = hallOfFame.members[i]
if member.loss >= simpler_member.loss
betterThanAllSmaller = false
smaller_element = hof.elements[j]
smaller_member = first(smaller_element)
if member.loss >= smaller_member.loss
is_dominating = false
break
end
# TODO: Why are we using loss and not score? In other words,
# why are we _pushing_ based on score and not loss?
end
if betterThanAllSmaller
if is_dominating
push!(dominating, copy(member))
end
end
Expand Down
Loading
Loading