Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ensure known_object_data is assigned before deserialize is called again #17619

Merged
merged 2 commits into from
Jul 29, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 19 additions & 42 deletions base/clusterserialize.jl
Original file line number Diff line number Diff line change
@@ -1,53 +1,32 @@
# This file is a part of Julia. License is MIT: http://julialang.org/license

import .Serializer: known_object_data, object_number, serialize_cycle, deserialize_cycle, writetag,
__deserialized_types__, serialize_typename_body, deserialize_typename_body,
__deserialized_types__, serialize_typename, deserialize_typename,
TYPENAME_TAG, object_numbers

type ClusterSerializer{I<:IO} <: AbstractSerializer
io::I
counter::Int
table::ObjectIdDict

sent_objects::Dict{UInt64, Bool} # used by serialize (track objects sent)
sent_objects::Set{UInt64} # used by serialize (track objects sent)

ClusterSerializer(io::I) = new(io, 0, ObjectIdDict(), Dict())
ClusterSerializer(io::I) = new(io, 0, ObjectIdDict(), Set{UInt64}())
end
ClusterSerializer(io::IO) = ClusterSerializer{typeof(io)}(io)

function deserialize(s::ClusterSerializer, ::Type{TypeName})
number, full_body_sent = deserialize(s)
makenew = false
known = haskey(known_object_data, number)
full_body_sent = deserialize(s)
number = read(s.io, UInt64)
if !full_body_sent
if !known
error("Expected object in cache. Not found.")
else
tn = known_object_data[number]::TypeName
tn = get(known_object_data, number, nothing)::TypeName
if !haskey(object_numbers, tn)
# setup reverse mapping for serialize
object_numbers[tn] = number
end
deserialize_cycle(s, tn)
else
name = deserialize(s)
mod = deserialize(s)
if known
tn = known_object_data[number]::TypeName
elseif mod !== __deserialized_types__ && isdefined(mod, name)
tn = getfield(mod, name).name
# TODO: confirm somehow that the types match
#warn(mod, ".", name, " isdefined, need not have been serialized")
name = tn.name
mod = tn.module
else
name = gensym()
mod = __deserialized_types__
tn = ccall(:jl_new_typename_in, Ref{TypeName}, (Any, Any), name, mod)
makenew = true
end
end
deserialize_cycle(s, tn)
full_body_sent && deserialize_typename_body(s, tn, number, name, mod, makenew)
!known && (known_object_data[number] = tn)
if !haskey(object_numbers, tn)
object_numbers[tn] = number
tn = deserialize_typename(s, number)
end
return tn
end
Expand All @@ -57,15 +36,13 @@ function serialize(s::ClusterSerializer, t::TypeName)
writetag(s.io, TYPENAME_TAG)

identifier = object_number(t)
if !haskey(s.sent_objects, identifier)
serialize(s, (identifier, true))
serialize(s, t.name)
serialize(s, t.module)
serialize_typename_body(s, t)
s.sent_objects[identifier] = true
# println(t.module, ":", t.name, ", id:", identifier, " sent")
else
serialize(s, (identifier, false))
# println(t.module, ":", t.name, ", id:", identifier, " NOT sent")
send_whole = !(identifier in s.sent_objects)
serialize(s, send_whole)
write(s.io, identifier)
if send_whole
serialize_typename(s, t)
push!(s.sent_objects, identifier)
end
# println(t.module, ":", t.name, ", id:", identifier, send_whole ? " sent" : " NOT sent")
nothing
end
1 change: 1 addition & 0 deletions base/methodshow.jl
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ function show(io::IO, m::Method; kwtype::Nullable{DataType}=Nullable{DataType}()
decls = Any[(), ("...", "")]
elseif ft <: Function &&
isdefined(ft.name.module, ft.name.mt.name) &&
# TODO: more accurate test? (tn.name === "#" name)
ft == typeof(getfield(ft.name.module, ft.name.mt.name))
print(io, ft.name.mt.name)
elseif isa(ft, DataType) && is(ft.name, Type.name) && isleaftype(ft)
Expand Down
108 changes: 56 additions & 52 deletions base/serialize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ end
function serialize(s::AbstractSerializer, g::GlobalRef)
writetag(s.io, GLOBALREF_TAG)
if g.mod === Main && isdefined(g.mod, g.name) && isconst(g.mod, g.name)
v = eval(g)
v = getfield(g.mod, g.name)
if isa(v, DataType) && v === v.name.primary && should_send_whole_type(s, v)
# handle references to types in Main by sending the whole type.
# needed to be able to send nested functions (#15451).
Expand All @@ -393,17 +393,16 @@ function serialize(s::AbstractSerializer, t::TypeName)
serialize_cycle(s, t) && return
writetag(s.io, TYPENAME_TAG)
write(s.io, object_number(t))
serialize(s, t.name)
serialize(s, t.module)
serialize_typename_body(s, t)
serialize_typename(s, t)
end

function serialize_typename_body(s::AbstractSerializer, t::TypeName)
function serialize_typename(s::AbstractSerializer, t::TypeName)
serialize(s, t.name)
serialize(s, t.names)
serialize(s, t.primary.super)
serialize(s, t.primary.parameters)
serialize(s, t.primary.types)
serialize(s, t.primary.size)
serialize(s, isdefined(t.primary, :instance))
serialize(s, t.primary.abstract)
serialize(s, t.primary.mutable)
serialize(s, t.primary.ninitialized)
Expand All @@ -419,27 +418,32 @@ function serialize_typename_body(s::AbstractSerializer, t::TypeName)
else
writetag(s.io, UNDEFREF_TAG)
end
nothing
end

# decide whether to send all data for a type (instead of just its name)
function should_send_whole_type(s, t::ANY)
function should_send_whole_type(s, t::DataType)
tn = t.name
if isdefined(tn, :mt)
# TODO improve somehow
# send whole type for anonymous functions in Main
fname = tn.mt.name
name = tn.mt.name
mod = tn.module
toplevel = isdefined(mod, fname) && isdefined(t, :instance) &&
getfield(mod, fname) === t.instance
ishidden = unsafe_load(unsafe_convert(Ptr{UInt8}, fname))==UInt8('#')
return mod === __deserialized_types__ || (mod === Main && (ishidden || !toplevel))
isanonfunction = mod === Main && # only Main
t.super === Function && # only Functions
unsafe_load(unsafe_convert(Ptr{UInt8}, tn.name)) == UInt8('#') && # hidden type
(!isdefined(mod, name) || t != typeof(getfield(mod, name))) # XXX: 95% accurate test for this being an inner function
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fwiw, this is only 95-ish% accurate because it mis-categorizes all inner functions as anonymous functions, while instead we actually want to only capture anonymous functions and all of their inner functions (but this information is nowhere available after lowering)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since it's still within Main, that's probably fine. Even if the outer functions are synchronized over all processors, the inner functions might have different mangled names if the processors executed different top-level statements in Main.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That seems suboptimal (the types won't be egal), although in that case it's still broken in the other direction (an inner function can be confused for an outer function in the case of code such as f() = g() = 2; const g = f())

fwiw, getting this right would also make anonymous and inner method show printing more readable. for now, I simply added a TODO comment there for revisiting later so that blame will indicate both files)

# TODO: more accurate test? (tn.name !== "#" name)
#TODO: iskw = startswith(tn.name, "#kw#") && ???
#TODO: iskw && return send-as-kwftype
return mod === __deserialized_types__ || isanonfunction
end
return false
end

# `type_itself` means we are serializing a type object. when it's false, we are
# sending the type tag part of some other object's representation.
function serialize_type_data(s, t::ANY, type_itself::Bool)
function serialize_type_data(s, t::DataType, type_itself::Bool)
whole = should_send_whole_type(s, t)
form = type_itself ? UInt8(0) : UInt8(1)
if whole
Expand Down Expand Up @@ -724,68 +728,66 @@ function deserialize(s::AbstractSerializer, ::Type{Union})
Union{types...}
end

module __deserialized_types__
end

module __deserialized_types__ end

function deserialize(s::AbstractSerializer, ::Type{TypeName})
# the deserialize_cycle call can be delayed, since neither
# Symbol nor Module will use the backref table
number = read(s.io, UInt64)
name = deserialize(s)
mod = deserialize(s)
if haskey(known_object_data, number)
tn = known_object_data[number]::TypeName
name = tn.name
mod = tn.module
makenew = false
elseif isdefined(mod, name)
tn = getfield(mod, name).name
# TODO: confirm somehow that the types match
name = tn.name
mod = tn.module
return deserialize_typename(s, number)
end

function deserialize_typename(s::AbstractSerializer, number)
name = deserialize(s)::Symbol
tn = get(known_object_data, number, nothing)
if tn !== nothing
makenew = false
else
name = gensym()
mod = __deserialized_types__
tn = ccall(:jl_new_typename_in, Ref{TypeName}, (Any, Any), name, mod)
# reuse the same name for the type, if possible, for nicer debugging
tn_name = isdefined(__deserialized_types__, name) ? gensym() : name
tn = ccall(:jl_new_typename_in, Ref{TypeName}, (Any, Any),
tn_name, __deserialized_types__)
makenew = true
known_object_data[number] = tn
end
if !haskey(object_numbers, tn)
# setup up reverse mapping for serialize
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"set up" here too, but can be fixed later

object_numbers[tn] = number
end
deserialize_cycle(s, tn)
deserialize_typename_body(s, tn, number, name, mod, makenew)
makenew && (known_object_data[number] = tn)
return tn
end

function deserialize_typename_body(s::AbstractSerializer, tn, number, name, mod, makenew)
names = deserialize(s)
super = deserialize(s)
parameters = deserialize(s)
types = deserialize(s)
size = deserialize(s)
abstr = deserialize(s)
mutable = deserialize(s)
ninitialized = deserialize(s)
names = deserialize(s)::SimpleVector
super = deserialize(s)::Type
parameters = deserialize(s)::SimpleVector
types = deserialize(s)::SimpleVector
has_instance = deserialize(s)::Bool
abstr = deserialize(s)::Bool
mutable = deserialize(s)::Bool
ninitialized = deserialize(s)::Int32

if makenew
tn.names = names
# TODO: there's an unhanded cycle in the dependency graph at this point:
# while deserializing super and/or types, we may have encountered
# tn.primary and throw UndefRefException before we get to this point
tn.primary = ccall(:jl_new_datatype, Any, (Any, Any, Any, Any, Any, Cint, Cint, Cint),
tn, super, parameters, names, types,
abstr, mutable, ninitialized)
ty = tn.primary
ccall(:jl_set_const, Void, (Any, Any, Any), mod, name, ty)
if !isdefined(ty,:instance)
if isempty(parameters) && !abstr && size == 0 && (!mutable || isempty(names))
setfield!(ty, :instance, ccall(:jl_new_struct, Any, (Any,Any...), ty))
end
ccall(:jl_set_const, Void, (Any, Any, Any), tn.module, tn.name, ty)
if has_instance && !isdefined(ty, :instance)
# use setfield! directly to avoid `fieldtype` lowering expecting to see a Singleton object already on ty
Core.setfield!(ty, :instance, ccall(:jl_new_struct, Any, (Any, Any...), ty))
end
end

tag = Int32(read(s.io, UInt8)::UInt8)
if tag != UNDEFREF_TAG
mtname = handle_deserialize(s, tag)
defs = deserialize(s)
maxa = deserialize(s)
maxa = deserialize(s)::Int
if makenew
tn.mt = ccall(:jl_new_method_table, Any, (Any, Any), name, mod)
tn.mt = ccall(:jl_new_method_table, Any, (Any, Any), name, tn.module)
tn.mt.name = mtname
tn.mt.defs = defs
tn.mt.max_args = maxa
Expand All @@ -798,6 +800,7 @@ function deserialize_typename_body(s::AbstractSerializer, tn, number, name, mod,
end
end
end
return tn::TypeName
end

function deserialize_datatype(s::AbstractSerializer)
Expand Down Expand Up @@ -845,7 +848,8 @@ function deserialize(s::AbstractSerializer, t::DataType)
return ccall(:jl_new_struct, Any, (Any,Any...), t)
elseif isbits(t)
if nf == 1
return ccall(:jl_new_struct, Any, (Any,Any...), t, deserialize(s))
f1 = deserialize(s)
return ccall(:jl_new_struct, Any, (Any,Any...), t, f1)
elseif nf == 2
f1 = deserialize(s)
f2 = deserialize(s)
Expand Down
28 changes: 28 additions & 0 deletions test/parallel_exec.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1023,6 +1023,34 @@ end
# issue #15451
@test remotecall_fetch(x->(y->2y)(x)+1, workers()[1], 3) == 7

# issue #16091
type T16091 end
wid = workers()[1]
@test try
remotecall_fetch(()->T16091, wid)
false
catch ex
((ex::RemoteException).captured::CapturedException).ex === UndefVarError(:T16091)
end
@test try
remotecall_fetch(identity, wid, T16091)
false
catch ex
((ex::RemoteException).captured::CapturedException).ex === UndefVarError(:T16091)
end

f16091a() = 1
remotecall_fetch(()->eval(:(f16091a() = 2)), wid)
@test remotecall_fetch(f16091a, wid) === 2
@test remotecall_fetch((myid)->remotecall_fetch(f16091a, myid), wid, myid()) === 1

# these will only heisen-fail, since it depends on the gensym counter collisions:
f16091b = () -> 1
remotecall_fetch(()->eval(:(f16091b = () -> 2)), wid)
@test remotecall_fetch(f16091b, 2) === 1
@test remotecall_fetch((myid)->remotecall_fetch(f16091b, myid), wid, myid()) === 2


# issue #16451
rng=RandomDevice()
retval = @parallel (+) for _ in 1:10
Expand Down
15 changes: 15 additions & 0 deletions test/serialize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,21 @@ create_serialization_stream() do s # Base generic function
@test deserialize(s)() === 1
end

# Anonymous Functions
create_serialization_stream() do s
local g() = :magic_token_anon_fun_test
serialize(s, g)
serialize(s, g)

seekstart(s)
local g2 = deserialize(s)
@test g2 !== g
@test g2() == :magic_token_anon_fun_test
@test g2() == :magic_token_anon_fun_test
@test deserialize(s) === g2
end


# Task
create_serialization_stream() do s # user-defined type array
f = () -> begin task_local_storage(:v, 2); return 1+1 end
Expand Down