From b55498c95bc252636b395ca30eded43f57fe25cb Mon Sep 17 00:00:00 2001 From: d-netto Date: Fri, 7 Jul 2023 18:52:33 -0300 Subject: [PATCH 1/4] relax assertion involving pg->nold to reflect that it may be a bit inaccurate with parallel marking --- src/gc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/gc.c b/src/gc.c index cb86d5d4535a7..756985803eb7f 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1377,7 +1377,12 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo // For quick sweep, we might be able to skip the page if the page doesn't // have any young live cell before marking. if (!sweep_full && !pg->has_young) { - assert(!prev_sweep_full || pg->prev_nold >= pg->nold); + // note that `pg->nold` may not be accurate with multithreaded marking since + // two threads may race when trying to set the mark bit in `gc_try_setmark_tag`. + // We're basically losing a bit of precision in the sweep phase at the cost of + // making the mark phase considerably cheaper. + // See issue #50419 + assert(jl_n_markthreads != 0 || !prev_sweep_full || pg->prev_nold >= pg->nold); if (!prev_sweep_full || pg->prev_nold == pg->nold) { // the position of the freelist begin/end in this page // is stored in its metadata From 49e467f167df0475ce701d4110ddd8fa35c1f72a Mon Sep 17 00:00:00 2001 From: d-netto Date: Fri, 7 Jul 2023 19:26:27 -0300 Subject: [PATCH 2/4] fixup & make increment to page->nold non-atomic --- src/gc.c | 24 +++++++++++++----------- test.jl | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 11 deletions(-) create mode 100644 test.jl diff --git a/src/gc.c b/src/gc.c index 756985803eb7f..25ed735c2b01d 100644 --- a/src/gc.c +++ b/src/gc.c @@ -854,7 +854,7 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o, if (mark_mode == GC_OLD_MARKED) { ptls->gc_cache.perm_scanned_bytes += page->osize; static_assert(sizeof(_Atomic(uint16_t)) == sizeof(page->nold), ""); - jl_atomic_fetch_add_relaxed((_Atomic(uint16_t)*)&page->nold, 1); + page->nold++; } else { ptls->gc_cache.scanned_bytes += page->osize; @@ -1382,17 +1382,19 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo // We're basically losing a bit of precision in the sweep phase at the cost of // making the mark phase considerably cheaper. // See issue #50419 - assert(jl_n_markthreads != 0 || !prev_sweep_full || pg->prev_nold >= pg->nold); - if (!prev_sweep_full || pg->prev_nold == pg->nold) { - // the position of the freelist begin/end in this page - // is stored in its metadata - if (pg->fl_begin_offset != (uint16_t)-1) { - *pfl = page_pfl_beg(pg); - pfl = (jl_taggedvalue_t**)page_pfl_end(pg); + if (jl_n_markthreads == 0) { + assert(!prev_sweep_full || pg->prev_nold >= pg->nold); + if (!prev_sweep_full || pg->prev_nold == pg->nold) { + // the position of the freelist begin/end in this page + // is stored in its metadata + if (pg->fl_begin_offset != (uint16_t)-1) { + *pfl = page_pfl_beg(pg); + pfl = (jl_taggedvalue_t**)page_pfl_end(pg); + } + freedall = 0; + nfree = pg->nfree; + goto done; } - freedall = 0; - nfree = pg->nfree; - goto done; } } diff --git a/test.jl b/test.jl new file mode 100644 index 0000000000000..896f47fa4c9c7 --- /dev/null +++ b/test.jl @@ -0,0 +1,54 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +module BinaryTreeMutable + +# Adopted from +# https://benchmarksgame-team.pages.debian.net/benchmarksgame/description/binarytrees.html#binarytrees + +using Base.Threads +using Printf + +mutable struct Node + l::Union{Nothing, Node} + r::Union{Nothing, Node} +end + +function make(n::Int) + return n === 0 ? Node(nothing, nothing) : Node(make(n-1), make(n-1)) +end + +function check(node::Node) + return 1 + (node.l === nothing ? 0 : check(node.l) + check(node.r)) +end + +function binary_trees(io, n::Int) + @printf io "stretch tree of depth %jd\t check: %jd\n" n+1 check(make(n+1)) + + long_tree = make(n) + minDepth = 4 + resultSize = div((n - minDepth), 2) + 1 + results = Vector{String}(undef, resultSize) + Threads.@threads for depth in minDepth:2:n + c = 0 + niter = 1 << (n - depth + minDepth) + for _ in 1:niter + c += check(make(depth)) + end + index = div((depth - minDepth),2) + 1 + results[index] = @sprintf "%jd\t trees of depth %jd\t check: %jd\n" niter depth c + end + + for i in results + write(io, i) + end + + @printf io "long lived tree of depth %jd\t check: %jd\n" n check(long_tree) +end + +end #module + +using .BinaryTreeMutable + +# Memory usage is 466MB +BinaryTreeMutable.binary_trees(devnull, 16) +GC.gc() From 4396e396a2554ada15ae45a4a99d11626814faee Mon Sep 17 00:00:00 2001 From: d-netto Date: Fri, 7 Jul 2023 19:27:56 -0300 Subject: [PATCH 3/4] delete spurious file --- test.jl | 54 ------------------------------------------------------ 1 file changed, 54 deletions(-) delete mode 100644 test.jl diff --git a/test.jl b/test.jl deleted file mode 100644 index 896f47fa4c9c7..0000000000000 --- a/test.jl +++ /dev/null @@ -1,54 +0,0 @@ -# This file is a part of Julia. License is MIT: https://julialang.org/license - -module BinaryTreeMutable - -# Adopted from -# https://benchmarksgame-team.pages.debian.net/benchmarksgame/description/binarytrees.html#binarytrees - -using Base.Threads -using Printf - -mutable struct Node - l::Union{Nothing, Node} - r::Union{Nothing, Node} -end - -function make(n::Int) - return n === 0 ? Node(nothing, nothing) : Node(make(n-1), make(n-1)) -end - -function check(node::Node) - return 1 + (node.l === nothing ? 0 : check(node.l) + check(node.r)) -end - -function binary_trees(io, n::Int) - @printf io "stretch tree of depth %jd\t check: %jd\n" n+1 check(make(n+1)) - - long_tree = make(n) - minDepth = 4 - resultSize = div((n - minDepth), 2) + 1 - results = Vector{String}(undef, resultSize) - Threads.@threads for depth in minDepth:2:n - c = 0 - niter = 1 << (n - depth + minDepth) - for _ in 1:niter - c += check(make(depth)) - end - index = div((depth - minDepth),2) + 1 - results[index] = @sprintf "%jd\t trees of depth %jd\t check: %jd\n" niter depth c - end - - for i in results - write(io, i) - end - - @printf io "long lived tree of depth %jd\t check: %jd\n" n check(long_tree) -end - -end #module - -using .BinaryTreeMutable - -# Memory usage is 466MB -BinaryTreeMutable.binary_trees(devnull, 16) -GC.gc() From d1ab7ca8a1001948e3960d11aac1ca18f231d45f Mon Sep 17 00:00:00 2001 From: d-netto Date: Sat, 8 Jul 2023 00:56:50 -0300 Subject: [PATCH 4/4] invert order in if statement --- src/gc.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/gc.c b/src/gc.c index 25ed735c2b01d..1b14452603239 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1374,15 +1374,15 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET) / osize; goto done; } - // For quick sweep, we might be able to skip the page if the page doesn't - // have any young live cell before marking. - if (!sweep_full && !pg->has_young) { - // note that `pg->nold` may not be accurate with multithreaded marking since - // two threads may race when trying to set the mark bit in `gc_try_setmark_tag`. - // We're basically losing a bit of precision in the sweep phase at the cost of - // making the mark phase considerably cheaper. - // See issue #50419 - if (jl_n_markthreads == 0) { + // note that `pg->nold` may not be accurate with multithreaded marking since + // two threads may race when trying to set the mark bit in `gc_try_setmark_tag`. + // We're basically losing a bit of precision in the sweep phase at the cost of + // making the mark phase considerably cheaper. + // See issue #50419 + if (jl_n_markthreads == 0) { + // For quick sweep, we might be able to skip the page if the page doesn't + // have any young live cell before marking. + if (!sweep_full && !pg->has_young) { assert(!prev_sweep_full || pg->prev_nold >= pg->nold); if (!prev_sweep_full || pg->prev_nold == pg->nold) { // the position of the freelist begin/end in this page