From c02a568bded74b3570775b3ea18ff59d04818277 Mon Sep 17 00:00:00 2001 From: Dominique Luna Date: Sat, 2 Mar 2024 13:34:21 -0500 Subject: [PATCH 1/9] fix #817 finding an optimal nest for a large list > 12 or so starts to take a noticeable long time - O(n^2) scaling. The bulk of the time was in `find_all_segment_splits`. _backtrack is refactored so that permutations function is no longer needed and we prune most of the search tree. --- Project.toml | 2 -- src/JuliaFormatter.jl | 1 - src/nest_utils.jl | 24 ++++++++++++------------ 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/Project.toml b/Project.toml index 658c940ea..34870bcb6 100644 --- a/Project.toml +++ b/Project.toml @@ -5,7 +5,6 @@ version = "1.0.50" [deps] CSTParser = "00ebfdb7-1f24-5e51-bd34-a7502290713f" -Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" CommonMark = "a80b9123-70ca-4bc0-993e-6e3bcb318db6" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" Glob = "c27321d9-0574-5035-807b-f59d2c89b15c" @@ -15,7 +14,6 @@ Tokenize = "0796e94c-ce3b-5d07-9a54-7f471281c624" [compat] CSTParser = "^3.4.0" -Combinatorics = "1" CommonMark = "0.5, 0.6, 0.7, 0.8" DataStructures = "0.17, 0.18" Glob = "1.3" diff --git a/src/JuliaFormatter.jl b/src/JuliaFormatter.jl index a166ae508..97dd6c49d 100644 --- a/src/JuliaFormatter.jl +++ b/src/JuliaFormatter.jl @@ -11,7 +11,6 @@ using Pkg.TOML: parsefile using Glob import CommonMark: block_modifier import Base: get, pairs -using Combinatorics: permutations using CommonMark: AdmonitionRule, CodeBlock, diff --git a/src/nest_utils.jl b/src/nest_utils.jl index dc458b879..5ddff2837 100644 --- a/src/nest_utils.jl +++ b/src/nest_utils.jl @@ -175,8 +175,9 @@ function nest_if_over_margin!( return false end -function find_all_segment_splits(n::Int, k::Int) +function find_all_segment_splits2(n::Int, k::Int, max_margin::Int) res = Vector{Int}[] + # n = size(dp, 1) if n == k return [fill(1, k)] @@ -190,25 +191,23 @@ function find_all_segment_splits(n::Int, k::Int) push!(res, t) end return + elseif current_sum >= n + return end - start_val = isempty(t) ? 1 : last(t) - max_val = n - current_sum - (k - length(t) - 1) + start_val = 1 + max_val = n - k + 1 for i in start_val:min(n, max_val) _backtrack([t; i], current_sum + i) end end - _backtrack(Int[], 0) - all_splits = Vector{Int}[] - for r in res - for c in unique(permutations(r)) - push!(all_splits, c) - end + for i in 1:(n - k + 1) + _backtrack([i], i) end - return all_splits + return res end """ @@ -285,7 +284,7 @@ function find_optimal_nest_placeholders( end end - # @info "" dp placeholder_inds + @info "" dp placeholder_inds N = size(dp, 1) @@ -307,7 +306,7 @@ function find_optimal_nest_placeholders( ranges end - all_splits = find_all_segment_splits(N, s) + @time all_splits = find_all_segment_splits(N, s, max_margin) best_split = UnitRange{Int}[] min_diff = 1_000_000 # big number! @@ -330,6 +329,7 @@ function find_optimal_nest_placeholders( segments = Tuple{Int,Int}[] for s in 1:N segments = find_best_segments(s) + @info "" segments fits = true for (i, s) in enumerate(segments) if i == 1 From 45117350e232b72e05a965aa9a7a723dc41b01ad Mon Sep 17 00:00:00 2001 From: Dominique Luna Date: Sat, 2 Mar 2024 13:53:26 -0500 Subject: [PATCH 2/9] bug --- src/nest_utils.jl | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/nest_utils.jl b/src/nest_utils.jl index 5ddff2837..e39b51adf 100644 --- a/src/nest_utils.jl +++ b/src/nest_utils.jl @@ -175,7 +175,7 @@ function nest_if_over_margin!( return false end -function find_all_segment_splits2(n::Int, k::Int, max_margin::Int) +function find_all_segment_splits(n::Int, k::Int, max_margin::Int) res = Vector{Int}[] # n = size(dp, 1) @@ -203,7 +203,7 @@ function find_all_segment_splits2(n::Int, k::Int, max_margin::Int) end end - for i in 1:(n - k + 1) + for i in 1:(n-k+1) _backtrack([i], i) end @@ -306,7 +306,7 @@ function find_optimal_nest_placeholders( ranges end - @time all_splits = find_all_segment_splits(N, s, max_margin) + all_splits = find_all_segment_splits(N, s, max_margin) best_split = UnitRange{Int}[] min_diff = 1_000_000 # big number! @@ -329,7 +329,6 @@ function find_optimal_nest_placeholders( segments = Tuple{Int,Int}[] for s in 1:N segments = find_best_segments(s) - @info "" segments fits = true for (i, s) in enumerate(segments) if i == 1 From 723571f9f71687e0e831d3faf64ba504bcaa3187 Mon Sep 17 00:00:00 2001 From: Dominique Luna Date: Sat, 2 Mar 2024 13:55:14 -0500 Subject: [PATCH 3/9] upper limit --- src/nest_utils.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nest_utils.jl b/src/nest_utils.jl index e39b51adf..665032f13 100644 --- a/src/nest_utils.jl +++ b/src/nest_utils.jl @@ -219,7 +219,7 @@ function find_optimal_nest_placeholders( max_margin::Int, )::Vector{Int} placeholder_inds = findall(n -> n.typ === PLACEHOLDER, fst.nodes) - if length(placeholder_inds) <= 1 + if length(placeholder_inds) <= 1 || length(placeholder_inds) >= 20 return placeholder_inds end newline_inds = findall(n -> n.typ === NEWLINE, fst.nodes) From 4a2a4e1a592db665856dee8296d3fa82bd4f231b Mon Sep 17 00:00:00 2001 From: Dominique Luna Date: Sat, 2 Mar 2024 13:55:28 -0500 Subject: [PATCH 4/9] make it 40 --- src/nest_utils.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nest_utils.jl b/src/nest_utils.jl index 665032f13..e034708b3 100644 --- a/src/nest_utils.jl +++ b/src/nest_utils.jl @@ -219,7 +219,7 @@ function find_optimal_nest_placeholders( max_margin::Int, )::Vector{Int} placeholder_inds = findall(n -> n.typ === PLACEHOLDER, fst.nodes) - if length(placeholder_inds) <= 1 || length(placeholder_inds) >= 20 + if length(placeholder_inds) <= 1 || length(placeholder_inds) >= 40 return placeholder_inds end newline_inds = findall(n -> n.typ === NEWLINE, fst.nodes) From fbea23a3b8a438da9032105e0885bb38255e09fd Mon Sep 17 00:00:00 2001 From: Dominique Luna Date: Sat, 2 Mar 2024 14:22:03 -0500 Subject: [PATCH 5/9] v bump --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 34870bcb6..655e8fdcb 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "JuliaFormatter" uuid = "98e50ef6-434e-11e9-1051-2b60c6c9e899" authors = ["Dominique Luna "] -version = "1.0.50" +version = "1.0.51" [deps] CSTParser = "00ebfdb7-1f24-5e51-bd34-a7502290713f" From 9a3d40b952c1c108f15c24dbbc322369e15cc47e Mon Sep 17 00:00:00 2001 From: Dominique Luna Date: Sat, 2 Mar 2024 17:24:05 -0500 Subject: [PATCH 6/9] ok --- src/nest_utils.jl | 20 ++++++++++++-------- test/issues.jl | 21 +++++++++++++++++++++ 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/nest_utils.jl b/src/nest_utils.jl index e034708b3..6dd602e02 100644 --- a/src/nest_utils.jl +++ b/src/nest_utils.jl @@ -175,9 +175,9 @@ function nest_if_over_margin!( return false end -function find_all_segment_splits(n::Int, k::Int, max_margin::Int) +function find_all_segment_splits(dp::Matrix{Int}, k::Int, max_margin::Int) res = Vector{Int}[] - # n = size(dp, 1) + n = size(dp, 1) if n == k return [fill(1, k)] @@ -195,15 +195,19 @@ function find_all_segment_splits(n::Int, k::Int, max_margin::Int) return end - start_val = 1 - max_val = n - k + 1 - - for i in start_val:min(n, max_val) + for i in 1:(n-k+1) + if current_sum + i > n + break + end _backtrack([t; i], current_sum + i) end end for i in 1:(n-k+1) + cm = dp[1, i] + if cm > max_margin + break + end _backtrack([i], i) end @@ -284,7 +288,7 @@ function find_optimal_nest_placeholders( end end - @info "" dp placeholder_inds + # @info "" dp placeholder_inds N = size(dp, 1) @@ -306,7 +310,7 @@ function find_optimal_nest_placeholders( ranges end - all_splits = find_all_segment_splits(N, s, max_margin) + all_splits = find_all_segment_splits(dp, s, max_margin) best_split = UnitRange{Int}[] min_diff = 1_000_000 # big number! diff --git a/test/issues.jl b/test/issues.jl index e82832247..a5eb6872f 100644 --- a/test/issues.jl +++ b/test/issues.jl @@ -1793,4 +1793,25 @@ """ @test format_text(s, SciMLStyle()) == s end + + @testset "817" begin + s = raw""" + a = ["Unknown" => SubRegion.Unknown, "Northern Europe" => SubRegion.Northern_Europe, "Southern Asia" => SubRegion.Southern_Asia, "Western Europe" => SubRegion.Western_Europe, "Sub-Saharan Africa" => SubRegion.Sub_Saharan_Africa, "Western Asia" => SubRegion.Western_Asia, "Eastern Asia" => SubRegion.Eastern_Asia, "Northern America" => SubRegion.Northern_America, "South-eastern Asia" => SubRegion.South_eastern_Asia, "Australia and New Zealand" => SubRegion.Australia_and_New_Zealand, "Eastern Europe" => SubRegion.Eastern_Europe, "Latin America and the Caribbean" => SubRegion.Latin_America_and_the_Caribbean, "Southern Europe" => SubRegion.Southern_Europe, "Central Asia" => SubRegion.Central_Asia] + """ + s2 = raw""" + a = ["Unknown" => SubRegion.Unknown, "Northern Europe" => SubRegion.Northern_Europe, + "Southern Asia" => SubRegion.Southern_Asia, + "Western Europe" => SubRegion.Western_Europe, + "Sub-Saharan Africa" => SubRegion.Sub_Saharan_Africa, + "Western Asia" => SubRegion.Western_Asia, "Eastern Asia" => SubRegion.Eastern_Asia, + "Northern America" => SubRegion.Northern_America, + "South-eastern Asia" => SubRegion.South_eastern_Asia, + "Australia and New Zealand" => SubRegion.Australia_and_New_Zealand, + "Eastern Europe" => SubRegion.Eastern_Europe, + "Latin America and the Caribbean" => SubRegion.Latin_America_and_the_Caribbean, + "Southern Europe" => SubRegion.Southern_Europe, + "Central Asia" => SubRegion.Central_Asia] + """ + @test format_text(s, SciMLStyle()) == s2 + end end From 328c13b43f6ac6682d0ad6ece0c07b5630914a33 Mon Sep 17 00:00:00 2001 From: Dominique Luna Date: Sat, 2 Mar 2024 17:30:16 -0500 Subject: [PATCH 7/9] further pruning --- src/nest_utils.jl | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/nest_utils.jl b/src/nest_utils.jl index 6dd602e02..50bf804c8 100644 --- a/src/nest_utils.jl +++ b/src/nest_utils.jl @@ -180,9 +180,9 @@ function find_all_segment_splits(dp::Matrix{Int}, k::Int, max_margin::Int) n = size(dp, 1) if n == k - return [fill(1, k)] + return Int[fill(1, k)] elseif k == 1 - return [[n]] + return Int[[n]] end function _backtrack(t::Vector{Int}, current_sum::Int) @@ -199,6 +199,9 @@ function find_all_segment_splits(dp::Matrix{Int}, k::Int, max_margin::Int) if current_sum + i > n break end + if dp[current_sum+1, current_sum+i] > max_margin + break + end _backtrack([t; i], current_sum + i) end end @@ -211,6 +214,10 @@ function find_all_segment_splits(dp::Matrix{Int}, k::Int, max_margin::Int) _backtrack([i], i) end + if length(res) == 0 + return [[n]] + end + return res end From 8802c85f627886019bb2a754c05d984c0e0b8b43 Mon Sep 17 00:00:00 2001 From: Dominique Luna Date: Sat, 2 Mar 2024 17:45:33 -0500 Subject: [PATCH 8/9] 30 --- src/nest_utils.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/nest_utils.jl b/src/nest_utils.jl index 50bf804c8..18a44a8b5 100644 --- a/src/nest_utils.jl +++ b/src/nest_utils.jl @@ -175,6 +175,7 @@ function nest_if_over_margin!( return false end +# TOOD: further improve the runtime of this function function find_all_segment_splits(dp::Matrix{Int}, k::Int, max_margin::Int) res = Vector{Int}[] n = size(dp, 1) @@ -230,7 +231,7 @@ function find_optimal_nest_placeholders( max_margin::Int, )::Vector{Int} placeholder_inds = findall(n -> n.typ === PLACEHOLDER, fst.nodes) - if length(placeholder_inds) <= 1 || length(placeholder_inds) >= 40 + if length(placeholder_inds) <= 1 || length(placeholder_inds) >= 30 return placeholder_inds end newline_inds = findall(n -> n.typ === NEWLINE, fst.nodes) From 51dba4456c9a868562556ae17db1f319f12b23c1 Mon Sep 17 00:00:00 2001 From: Dominique Luna Date: Sat, 2 Mar 2024 17:45:45 -0500 Subject: [PATCH 9/9] rm comment --- src/nest_utils.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/nest_utils.jl b/src/nest_utils.jl index 18a44a8b5..fd07feb82 100644 --- a/src/nest_utils.jl +++ b/src/nest_utils.jl @@ -250,8 +250,6 @@ function find_optimal_nest_placeholders( end push!(placeholder_groups, current_group) - # @info "groups" placeholder_groups - optimal_placeholders = Int[] for (i, g) in enumerate(placeholder_groups) optinds = find_optimal_nest_placeholders(