From 51443876b880819222b7743c65ad81a11f307f44 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Mon, 25 Jul 2016 15:25:01 -0400 Subject: [PATCH 1/2] ensure known_object_data is assigned before deserialize is called again also reduce code duplication for easier maintenance ref #16091 --- base/clusterserialize.jl | 52 +++++++++++++--------------------------- base/serialize.jl | 32 +++++++++++++++---------- 2 files changed, 37 insertions(+), 47 deletions(-) diff --git a/base/clusterserialize.jl b/base/clusterserialize.jl index f8505ca9b7ae5..e265c34cebd10 100644 --- a/base/clusterserialize.jl +++ b/base/clusterserialize.jl @@ -9,45 +9,24 @@ type ClusterSerializer{I<:IO} <: AbstractSerializer counter::Int table::ObjectIdDict - sent_objects::Dict{UInt64, Bool} # used by serialize (track objects sent) + sent_objects::Set{UInt64} # used by serialize (track objects sent) - ClusterSerializer(io::I) = new(io, 0, ObjectIdDict(), Dict()) + ClusterSerializer(io::I) = new(io, 0, ObjectIdDict(), Set{UInt64}()) end ClusterSerializer(io::IO) = ClusterSerializer{typeof(io)}(io) function deserialize(s::ClusterSerializer, ::Type{TypeName}) - number, full_body_sent = deserialize(s) - makenew = false - known = haskey(known_object_data, number) + full_body_sent = deserialize(s) if !full_body_sent - if !known - error("Expected object in cache. Not found.") - else - tn = known_object_data[number]::TypeName + number = read(s.io, UInt64) + tn = get(known_object_data, number, nothing)::TypeName + if !haskey(object_numbers, tn) + # setup reverse mapping for serialize + object_numbers[tn] = number end + deserialize_cycle(s, tn) else - name = deserialize(s) - mod = deserialize(s) - if known - tn = known_object_data[number]::TypeName - elseif mod !== __deserialized_types__ && isdefined(mod, name) - tn = getfield(mod, name).name - # TODO: confirm somehow that the types match - #warn(mod, ".", name, " isdefined, need not have been serialized") - name = tn.name - mod = tn.module - else - name = gensym() - mod = __deserialized_types__ - tn = ccall(:jl_new_typename_in, Ref{TypeName}, (Any, Any), name, mod) - makenew = true - end - end - deserialize_cycle(s, tn) - full_body_sent && deserialize_typename_body(s, tn, number, name, mod, makenew) - !known && (known_object_data[number] = tn) - if !haskey(object_numbers, tn) - object_numbers[tn] = number + tn = invoke(deserialize, (AbstractSerializer, Type{TypeName}), s, TypeName) end return tn end @@ -57,15 +36,18 @@ function serialize(s::ClusterSerializer, t::TypeName) writetag(s.io, TYPENAME_TAG) identifier = object_number(t) - if !haskey(s.sent_objects, identifier) - serialize(s, (identifier, true)) + if !(identifier in s.sent_objects) + serialize(s, true) + write(s.io, identifier) serialize(s, t.name) serialize(s, t.module) serialize_typename_body(s, t) - s.sent_objects[identifier] = true + push!(s.sent_objects, identifier) # println(t.module, ":", t.name, ", id:", identifier, " sent") else - serialize(s, (identifier, false)) + serialize(s, false) + write(s.io, identifier) # println(t.module, ":", t.name, ", id:", identifier, " NOT sent") end + nothing end diff --git a/base/serialize.jl b/base/serialize.jl index f0e6d938fba49..d0d5e0cbe9d26 100644 --- a/base/serialize.jl +++ b/base/serialize.jl @@ -419,6 +419,7 @@ function serialize_typename_body(s::AbstractSerializer, t::TypeName) else writetag(s.io, UNDEFREF_TAG) end + nothing end # decide whether to send all data for a type (instead of just its name) @@ -729,29 +730,32 @@ end function deserialize(s::AbstractSerializer, ::Type{TypeName}) + # the deserialize_cycle call can be delayed, since neither + # Symbol nor Module will use the backref table number = read(s.io, UInt64) - name = deserialize(s) - mod = deserialize(s) - if haskey(known_object_data, number) - tn = known_object_data[number]::TypeName - name = tn.name - mod = tn.module + name = deserialize(s)::Symbol + mod = deserialize(s)::Module + tn = get(known_object_data, number, nothing) + if tn !== nothing makenew = false - elseif isdefined(mod, name) + elseif mod !== __deserialized_types__ && isdefined(mod, name) tn = getfield(mod, name).name # TODO: confirm somehow that the types match - name = tn.name - mod = tn.module makenew = false + known_object_data[number] = tn else name = gensym() mod = __deserialized_types__ tn = ccall(:jl_new_typename_in, Ref{TypeName}, (Any, Any), name, mod) makenew = true + known_object_data[number] = tn + end + if !haskey(object_numbers, tn) + # setup up reverse mapping for serialize + object_numbers[tn] = number end deserialize_cycle(s, tn) deserialize_typename_body(s, tn, number, name, mod, makenew) - makenew && (known_object_data[number] = tn) return tn end @@ -767,14 +771,18 @@ function deserialize_typename_body(s::AbstractSerializer, tn, number, name, mod, if makenew tn.names = names + # TODO: there's an unhanded cycle in the dependency graph at this point: + # while deserializing super and/or types, we may have encountered + # tn.primary and throw UndefRefException before we get to this point tn.primary = ccall(:jl_new_datatype, Any, (Any, Any, Any, Any, Any, Cint, Cint, Cint), tn, super, parameters, names, types, abstr, mutable, ninitialized) ty = tn.primary ccall(:jl_set_const, Void, (Any, Any, Any), mod, name, ty) - if !isdefined(ty,:instance) + if !isdefined(ty, :instance) if isempty(parameters) && !abstr && size == 0 && (!mutable || isempty(names)) - setfield!(ty, :instance, ccall(:jl_new_struct, Any, (Any,Any...), ty)) + # use setfield! directly to avoid `fieldtype` lowering expecting to see a Singleton object already on ty + Core.setfield!(ty, :instance, ccall(:jl_new_struct, Any, (Any, Any...), ty)) end end end From d584142f1788f03e943d1e4f1f56018d57d4d29e Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Thu, 28 Jul 2016 14:29:48 -0400 Subject: [PATCH 2/2] fix anonymous function serialization logic should_send_whole_type was sending too many regular type that weren't functions and deserialize was trying to hide that error, but was then not deserializing things that were anonymous functions fix #16091 --- base/clusterserialize.jl | 23 ++++------- base/methodshow.jl | 1 + base/serialize.jl | 88 +++++++++++++++++++--------------------- test/parallel_exec.jl | 28 +++++++++++++ test/serialize.jl | 15 +++++++ 5 files changed, 95 insertions(+), 60 deletions(-) diff --git a/base/clusterserialize.jl b/base/clusterserialize.jl index e265c34cebd10..4f42a862c3ba9 100644 --- a/base/clusterserialize.jl +++ b/base/clusterserialize.jl @@ -1,7 +1,7 @@ # This file is a part of Julia. License is MIT: http://julialang.org/license import .Serializer: known_object_data, object_number, serialize_cycle, deserialize_cycle, writetag, - __deserialized_types__, serialize_typename_body, deserialize_typename_body, + __deserialized_types__, serialize_typename, deserialize_typename, TYPENAME_TAG, object_numbers type ClusterSerializer{I<:IO} <: AbstractSerializer @@ -17,8 +17,8 @@ ClusterSerializer(io::IO) = ClusterSerializer{typeof(io)}(io) function deserialize(s::ClusterSerializer, ::Type{TypeName}) full_body_sent = deserialize(s) + number = read(s.io, UInt64) if !full_body_sent - number = read(s.io, UInt64) tn = get(known_object_data, number, nothing)::TypeName if !haskey(object_numbers, tn) # setup reverse mapping for serialize @@ -26,7 +26,7 @@ function deserialize(s::ClusterSerializer, ::Type{TypeName}) end deserialize_cycle(s, tn) else - tn = invoke(deserialize, (AbstractSerializer, Type{TypeName}), s, TypeName) + tn = deserialize_typename(s, number) end return tn end @@ -36,18 +36,13 @@ function serialize(s::ClusterSerializer, t::TypeName) writetag(s.io, TYPENAME_TAG) identifier = object_number(t) - if !(identifier in s.sent_objects) - serialize(s, true) - write(s.io, identifier) - serialize(s, t.name) - serialize(s, t.module) - serialize_typename_body(s, t) + send_whole = !(identifier in s.sent_objects) + serialize(s, send_whole) + write(s.io, identifier) + if send_whole + serialize_typename(s, t) push!(s.sent_objects, identifier) -# println(t.module, ":", t.name, ", id:", identifier, " sent") - else - serialize(s, false) - write(s.io, identifier) -# println(t.module, ":", t.name, ", id:", identifier, " NOT sent") end +# println(t.module, ":", t.name, ", id:", identifier, send_whole ? " sent" : " NOT sent") nothing end diff --git a/base/methodshow.jl b/base/methodshow.jl index 79737b4f596f4..06a24df9377d3 100644 --- a/base/methodshow.jl +++ b/base/methodshow.jl @@ -79,6 +79,7 @@ function show(io::IO, m::Method; kwtype::Nullable{DataType}=Nullable{DataType}() decls = Any[(), ("...", "")] elseif ft <: Function && isdefined(ft.name.module, ft.name.mt.name) && + # TODO: more accurate test? (tn.name === "#" name) ft == typeof(getfield(ft.name.module, ft.name.mt.name)) print(io, ft.name.mt.name) elseif isa(ft, DataType) && is(ft.name, Type.name) && isleaftype(ft) diff --git a/base/serialize.jl b/base/serialize.jl index d0d5e0cbe9d26..39af8a55a7607 100644 --- a/base/serialize.jl +++ b/base/serialize.jl @@ -374,7 +374,7 @@ end function serialize(s::AbstractSerializer, g::GlobalRef) writetag(s.io, GLOBALREF_TAG) if g.mod === Main && isdefined(g.mod, g.name) && isconst(g.mod, g.name) - v = eval(g) + v = getfield(g.mod, g.name) if isa(v, DataType) && v === v.name.primary && should_send_whole_type(s, v) # handle references to types in Main by sending the whole type. # needed to be able to send nested functions (#15451). @@ -393,17 +393,16 @@ function serialize(s::AbstractSerializer, t::TypeName) serialize_cycle(s, t) && return writetag(s.io, TYPENAME_TAG) write(s.io, object_number(t)) - serialize(s, t.name) - serialize(s, t.module) - serialize_typename_body(s, t) + serialize_typename(s, t) end -function serialize_typename_body(s::AbstractSerializer, t::TypeName) +function serialize_typename(s::AbstractSerializer, t::TypeName) + serialize(s, t.name) serialize(s, t.names) serialize(s, t.primary.super) serialize(s, t.primary.parameters) serialize(s, t.primary.types) - serialize(s, t.primary.size) + serialize(s, isdefined(t.primary, :instance)) serialize(s, t.primary.abstract) serialize(s, t.primary.mutable) serialize(s, t.primary.ninitialized) @@ -423,24 +422,28 @@ function serialize_typename_body(s::AbstractSerializer, t::TypeName) end # decide whether to send all data for a type (instead of just its name) -function should_send_whole_type(s, t::ANY) +function should_send_whole_type(s, t::DataType) tn = t.name if isdefined(tn, :mt) # TODO improve somehow # send whole type for anonymous functions in Main - fname = tn.mt.name + name = tn.mt.name mod = tn.module - toplevel = isdefined(mod, fname) && isdefined(t, :instance) && - getfield(mod, fname) === t.instance - ishidden = unsafe_load(unsafe_convert(Ptr{UInt8}, fname))==UInt8('#') - return mod === __deserialized_types__ || (mod === Main && (ishidden || !toplevel)) + isanonfunction = mod === Main && # only Main + t.super === Function && # only Functions + unsafe_load(unsafe_convert(Ptr{UInt8}, tn.name)) == UInt8('#') && # hidden type + (!isdefined(mod, name) || t != typeof(getfield(mod, name))) # XXX: 95% accurate test for this being an inner function + # TODO: more accurate test? (tn.name !== "#" name) + #TODO: iskw = startswith(tn.name, "#kw#") && ??? + #TODO: iskw && return send-as-kwftype + return mod === __deserialized_types__ || isanonfunction end return false end # `type_itself` means we are serializing a type object. when it's false, we are # sending the type tag part of some other object's representation. -function serialize_type_data(s, t::ANY, type_itself::Bool) +function serialize_type_data(s, t::DataType, type_itself::Bool) whole = should_send_whole_type(s, t) form = type_itself ? UInt8(0) : UInt8(1) if whole @@ -725,28 +728,25 @@ function deserialize(s::AbstractSerializer, ::Type{Union}) Union{types...} end -module __deserialized_types__ -end - +module __deserialized_types__ end function deserialize(s::AbstractSerializer, ::Type{TypeName}) # the deserialize_cycle call can be delayed, since neither # Symbol nor Module will use the backref table number = read(s.io, UInt64) + return deserialize_typename(s, number) +end + +function deserialize_typename(s::AbstractSerializer, number) name = deserialize(s)::Symbol - mod = deserialize(s)::Module tn = get(known_object_data, number, nothing) if tn !== nothing makenew = false - elseif mod !== __deserialized_types__ && isdefined(mod, name) - tn = getfield(mod, name).name - # TODO: confirm somehow that the types match - makenew = false - known_object_data[number] = tn else - name = gensym() - mod = __deserialized_types__ - tn = ccall(:jl_new_typename_in, Ref{TypeName}, (Any, Any), name, mod) + # reuse the same name for the type, if possible, for nicer debugging + tn_name = isdefined(__deserialized_types__, name) ? gensym() : name + tn = ccall(:jl_new_typename_in, Ref{TypeName}, (Any, Any), + tn_name, __deserialized_types__) makenew = true known_object_data[number] = tn end @@ -755,19 +755,15 @@ function deserialize(s::AbstractSerializer, ::Type{TypeName}) object_numbers[tn] = number end deserialize_cycle(s, tn) - deserialize_typename_body(s, tn, number, name, mod, makenew) - return tn -end -function deserialize_typename_body(s::AbstractSerializer, tn, number, name, mod, makenew) - names = deserialize(s) - super = deserialize(s) - parameters = deserialize(s) - types = deserialize(s) - size = deserialize(s) - abstr = deserialize(s) - mutable = deserialize(s) - ninitialized = deserialize(s) + names = deserialize(s)::SimpleVector + super = deserialize(s)::Type + parameters = deserialize(s)::SimpleVector + types = deserialize(s)::SimpleVector + has_instance = deserialize(s)::Bool + abstr = deserialize(s)::Bool + mutable = deserialize(s)::Bool + ninitialized = deserialize(s)::Int32 if makenew tn.names = names @@ -778,12 +774,10 @@ function deserialize_typename_body(s::AbstractSerializer, tn, number, name, mod, tn, super, parameters, names, types, abstr, mutable, ninitialized) ty = tn.primary - ccall(:jl_set_const, Void, (Any, Any, Any), mod, name, ty) - if !isdefined(ty, :instance) - if isempty(parameters) && !abstr && size == 0 && (!mutable || isempty(names)) - # use setfield! directly to avoid `fieldtype` lowering expecting to see a Singleton object already on ty - Core.setfield!(ty, :instance, ccall(:jl_new_struct, Any, (Any, Any...), ty)) - end + ccall(:jl_set_const, Void, (Any, Any, Any), tn.module, tn.name, ty) + if has_instance && !isdefined(ty, :instance) + # use setfield! directly to avoid `fieldtype` lowering expecting to see a Singleton object already on ty + Core.setfield!(ty, :instance, ccall(:jl_new_struct, Any, (Any, Any...), ty)) end end @@ -791,9 +785,9 @@ function deserialize_typename_body(s::AbstractSerializer, tn, number, name, mod, if tag != UNDEFREF_TAG mtname = handle_deserialize(s, tag) defs = deserialize(s) - maxa = deserialize(s) + maxa = deserialize(s)::Int if makenew - tn.mt = ccall(:jl_new_method_table, Any, (Any, Any), name, mod) + tn.mt = ccall(:jl_new_method_table, Any, (Any, Any), name, tn.module) tn.mt.name = mtname tn.mt.defs = defs tn.mt.max_args = maxa @@ -806,6 +800,7 @@ function deserialize_typename_body(s::AbstractSerializer, tn, number, name, mod, end end end + return tn::TypeName end function deserialize_datatype(s::AbstractSerializer) @@ -853,7 +848,8 @@ function deserialize(s::AbstractSerializer, t::DataType) return ccall(:jl_new_struct, Any, (Any,Any...), t) elseif isbits(t) if nf == 1 - return ccall(:jl_new_struct, Any, (Any,Any...), t, deserialize(s)) + f1 = deserialize(s) + return ccall(:jl_new_struct, Any, (Any,Any...), t, f1) elseif nf == 2 f1 = deserialize(s) f2 = deserialize(s) diff --git a/test/parallel_exec.jl b/test/parallel_exec.jl index 81d3d47a20507..6718c99a21eb0 100644 --- a/test/parallel_exec.jl +++ b/test/parallel_exec.jl @@ -1023,6 +1023,34 @@ end # issue #15451 @test remotecall_fetch(x->(y->2y)(x)+1, workers()[1], 3) == 7 +# issue #16091 +type T16091 end +wid = workers()[1] +@test try + remotecall_fetch(()->T16091, wid) + false +catch ex + ((ex::RemoteException).captured::CapturedException).ex === UndefVarError(:T16091) +end +@test try + remotecall_fetch(identity, wid, T16091) + false +catch ex + ((ex::RemoteException).captured::CapturedException).ex === UndefVarError(:T16091) +end + +f16091a() = 1 +remotecall_fetch(()->eval(:(f16091a() = 2)), wid) +@test remotecall_fetch(f16091a, wid) === 2 +@test remotecall_fetch((myid)->remotecall_fetch(f16091a, myid), wid, myid()) === 1 + +# these will only heisen-fail, since it depends on the gensym counter collisions: +f16091b = () -> 1 +remotecall_fetch(()->eval(:(f16091b = () -> 2)), wid) +@test remotecall_fetch(f16091b, 2) === 1 +@test remotecall_fetch((myid)->remotecall_fetch(f16091b, myid), wid, myid()) === 2 + + # issue #16451 rng=RandomDevice() retval = @parallel (+) for _ in 1:10 diff --git a/test/serialize.jl b/test/serialize.jl index d36622738d950..68c41ab89fc54 100644 --- a/test/serialize.jl +++ b/test/serialize.jl @@ -278,6 +278,21 @@ create_serialization_stream() do s # Base generic function @test deserialize(s)() === 1 end +# Anonymous Functions +create_serialization_stream() do s + local g() = :magic_token_anon_fun_test + serialize(s, g) + serialize(s, g) + + seekstart(s) + local g2 = deserialize(s) + @test g2 !== g + @test g2() == :magic_token_anon_fun_test + @test g2() == :magic_token_anon_fun_test + @test deserialize(s) === g2 +end + + # Task create_serialization_stream() do s # user-defined type array f = () -> begin task_local_storage(:v, 2); return 1+1 end