From 6bd5d3aacfa319939854977342b4b32026f6275a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Glen=20M=C3=A9vel?= Date: Fri, 26 Jul 2024 22:00:19 +0200 Subject: [PATCH 01/14] doc/CCHeap: reorder conversion functions --- src/core/CCHeap.ml | 116 ++++++++++++++++++++++---------------------- src/core/CCHeap.mli | 40 +++++++-------- 2 files changed, 78 insertions(+), 78 deletions(-) diff --git a/src/core/CCHeap.ml b/src/core/CCHeap.ml index dbd048245..5aefb2f87 100644 --- a/src/core/CCHeap.ml +++ b/src/core/CCHeap.ml @@ -89,21 +89,11 @@ module type S = sig (** {2 Conversions} *) - val to_list : t -> elt list - (** Return the elements of the heap, in no particular order. *) - - val to_list_sorted : t -> elt list - (** Return the elements in increasing order. - @since 1.1 *) - val add_list : t -> elt list -> t (** Add the elements of the list to the heap. An element occurring several times will be added that many times to the heap. @since 0.16 *) - val of_list : elt list -> t - (** [of_list l] is [add_list empty l]. Complexity: [O(n log n)]. *) - val add_iter : t -> elt iter -> t (** Like {!add_list}. @since 2.8 *) @@ -112,6 +102,12 @@ module type S = sig (** Like {!add_list}. @since 2.8 *) + val add_gen : t -> elt gen -> t + (** @since 0.16 *) + + val of_list : elt list -> t + (** [of_list l] is [add_list empty l]. Complexity: [O(n log n)]. *) + val of_iter : elt iter -> t (** Build a heap from a given [iter]. Complexity: [O(n log n)]. @since 2.8 *) @@ -120,6 +116,12 @@ module type S = sig (** Build a heap from a given [Seq.t]. Complexity: [O(n log n)]. @since 2.8 *) + val of_gen : elt gen -> t + (** Build a heap from a given [gen]. Complexity: [O(n log n)]. *) + + val to_list : t -> elt list + (** Return the elements of the heap, in no particular order. *) + val to_iter : t -> elt iter (** Return a [iter] of the elements of the heap. @since 2.8 *) @@ -128,6 +130,13 @@ module type S = sig (** Return a [Seq.t] of the elements of the heap. @since 2.8 *) + val to_gen : t -> elt gen + (** Return a [gen] of the elements of the heap. *) + + val to_list_sorted : t -> elt list + (** Return the elements in increasing order. + @since 1.1 *) + val to_iter_sorted : t -> elt iter (** Iterate on the elements, in increasing order. @since 2.8 *) @@ -136,15 +145,6 @@ module type S = sig (** Iterate on the elements, in increasing order. @since 2.8 *) - val add_gen : t -> elt gen -> t - (** @since 0.16 *) - - val of_gen : elt gen -> t - (** Build a heap from a given [gen]. Complexity: [O(n log n)]. *) - - val to_gen : t -> elt gen - (** Return a [gen] of the elements of the heap. *) - val to_tree : t -> elt ktree (** Return a [ktree] of the elements of the heap. *) @@ -283,24 +283,7 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct (** {2 Conversions} *) - let to_list h = - let rec aux acc h = - match h with - | E -> acc - | N (_, x, l, r) -> x :: aux (aux acc l) r - in - aux [] h - - let to_list_sorted heap = - let rec recurse acc h = - match take h with - | None -> List.rev acc - | Some (h', x) -> recurse (x :: acc) h' - in - recurse [] heap - let add_list h l = List.fold_left add h l - let of_list l = add_list empty l let add_iter h i = let h = ref h in @@ -312,8 +295,24 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct Seq.iter (fun x -> h := insert x !h) seq; !h + let rec add_gen h g = + match g () with + | None -> h + | Some x -> add_gen (add h x) g + + let of_list l = add_list empty l let of_iter i = add_iter empty i let of_seq seq = add_seq empty seq + let of_gen g = add_gen empty g + + let to_list h = + let rec aux acc h = + match h with + | E -> acc + | N (_, x, l, r) -> x :: aux (aux acc l) r + in + aux [] h + let to_iter h k = iter k h let to_seq h = @@ -326,28 +325,6 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct in aux [ h ] - let to_iter_sorted heap = - let rec recurse h k = - match take h with - | None -> () - | Some (h', x) -> - k x; - recurse h' k - in - fun k -> recurse heap k - - let rec to_seq_sorted h () = - match take h with - | None -> Seq.Nil - | Some (h', x) -> Seq.Cons (x, to_seq_sorted h') - - let rec add_gen h g = - match g () with - | None -> h - | Some x -> add_gen (add h x) g - - let of_gen g = add_gen empty g - let to_gen h = let stack = Stack.create () in Stack.push h stack; @@ -365,6 +342,29 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct in next + let to_list_sorted heap = + let rec recurse acc h = + match take h with + | None -> List.rev acc + | Some (h', x) -> recurse (x :: acc) h' + in + recurse [] heap + + let to_iter_sorted heap = + let rec recurse h k = + match take h with + | None -> () + | Some (h', x) -> + k x; + recurse h' k + in + fun k -> recurse heap k + + let rec to_seq_sorted h () = + match take h with + | None -> Seq.Nil + | Some (h', x) -> Seq.Cons (x, to_seq_sorted h') + let rec to_tree h () = match h with | E -> `Nil diff --git a/src/core/CCHeap.mli b/src/core/CCHeap.mli index 894a7ad20..fdc4269ff 100644 --- a/src/core/CCHeap.mli +++ b/src/core/CCHeap.mli @@ -93,21 +93,11 @@ module type S = sig (** {2 Conversions} *) - val to_list : t -> elt list - (** [to_list h] returns the elements of the heap [h], in no particular order. *) - - val to_list_sorted : t -> elt list - (** [to_list_sorted h] returns the elements of the heap [h] in increasing order. - @since 1.1 *) - val add_list : t -> elt list -> t (** [add_list h l] adds the elements of the list [l] into the heap [h]. An element occurring several times will be added that many times to the heap. @since 0.16 *) - val of_list : elt list -> t - (** [of_list l] is [add_list empty l]. Complexity: [O(n log n)]. *) - val add_iter : t -> elt iter -> t (** [add_iter h iter] is like {!add_list}. @since 2.8 *) @@ -117,6 +107,13 @@ module type S = sig Renamed from [add_std_seq] since 3.0. @since 3.0 *) + val add_gen : t -> elt gen -> t + (** [add_gen h gen] adds the gen [gen] to the heap [h]. + @since 0.16 *) + + val of_list : elt list -> t + (** [of_list l] is [add_list empty l]. Complexity: [O(n log n)]. *) + val of_iter : elt iter -> t (** [of_iter iter] builds a heap from a given [iter]. Complexity: [O(n log n)]. @since 2.8 *) @@ -126,6 +123,12 @@ module type S = sig Renamed from [of_seq] since 3.0. @since 3.0 *) + val of_gen : elt gen -> t + (** [of_gen gen] builds a heap from a given [gen]. Complexity: [O(n log n)]. *) + + val to_list : t -> elt list + (** [to_list h] returns the elements of the heap [h], in no particular order. *) + val to_iter : t -> elt iter (** [to_iter h] returns a [iter] of the elements of the heap [h]. @since 2.8 *) @@ -135,6 +138,13 @@ module type S = sig Renamed from [to_std_seq] since 3.0. @since 3.0 *) + val to_gen : t -> elt gen + (** [to_gen h] returns a [gen] of the elements of the heap [h]. *) + + val to_list_sorted : t -> elt list + (** [to_list_sorted h] returns the elements of the heap [h] in increasing order. + @since 1.1 *) + val to_iter_sorted : t -> elt iter (** [to_iter_sorted h] returns a [iter] by iterating on the elements of [h], in increasing order. @@ -146,16 +156,6 @@ module type S = sig Renamed from [to_std_seq_sorted] since 3.0. @since 3.0 *) - val add_gen : t -> elt gen -> t - (** [add_gen h gen] adds the gen [gen] to the heap [h]. - @since 0.16 *) - - val of_gen : elt gen -> t - (** [of_gen gen] builds a heap from a given [gen]. Complexity: [O(n log n)]. *) - - val to_gen : t -> elt gen - (** [to_gen h] returns a [gen] of the elements of the heap [h]. *) - val to_tree : t -> elt ktree (** [to_tree h] returns a [ktree] of the elements of the heap [h]. *) From 8666faf25793fa16f0e04d27ec54ed76f2817c93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Glen=20M=C3=A9vel?= Date: Fri, 26 Jul 2024 22:33:33 +0200 Subject: [PATCH 02/14] doc/CCHeap: uniformize doc of conversion functions --- src/core/CCHeap.ml | 74 +++++++++++++++++++++++++++++++-------------- src/core/CCHeap.mli | 53 +++++++++++++++++++++----------- 2 files changed, 86 insertions(+), 41 deletions(-) diff --git a/src/core/CCHeap.ml b/src/core/CCHeap.ml index 5aefb2f87..bd2f4c03c 100644 --- a/src/core/CCHeap.ml +++ b/src/core/CCHeap.ml @@ -87,66 +87,90 @@ module type S = sig val size : t -> int (** Number of elements (linear complexity). *) - (** {2 Conversions} *) + (** {2 Adding many elements at once} *) val add_list : t -> elt list -> t - (** Add the elements of the list to the heap. An element occurring several - times will be added that many times to the heap. + (** [add_list h l] adds the elements of the list [l] into the heap [h]. + An element occurring several times will be added that many times to the heap. @since 0.16 *) val add_iter : t -> elt iter -> t - (** Like {!add_list}. + (** [add_iter h iter] is akin to {!add_list}, + but taking an [iter] of elements as input. @since 2.8 *) val add_seq : t -> elt Seq.t -> t - (** Like {!add_list}. - @since 2.8 *) + (** [add_seq h seq] is akin to {!add_list}, + but taking a [Seq.t] of elements as input. + Renamed from [add_std_seq] since 3.0. + @since 3.0 *) val add_gen : t -> elt gen -> t - (** @since 0.16 *) + (** [add_gen h gen] is akin to {!add_list}, + but taking a [gen] of elements as input. + @since 0.16 *) + + (** {2 Conversions} *) val of_list : elt list -> t - (** [of_list l] is [add_list empty l]. Complexity: [O(n log n)]. *) + (** [of_list l] builds a heap from a given list of elements. + It is equivalent to [add_list empty l]. + Complexity: [O(n log n)]. + *) val of_iter : elt iter -> t - (** Build a heap from a given [iter]. Complexity: [O(n log n)]. + (** [of_iter iter] is akin to {!of_list}, + but taking an [iter] of elements as input. @since 2.8 *) val of_seq : elt Seq.t -> t - (** Build a heap from a given [Seq.t]. Complexity: [O(n log n)]. - @since 2.8 *) + (** [of_seq seq] is akin to {!of_list}, + but taking a [Seq.t] of elements as input. + Renamed from [of_seq] since 3.0. + @since 3.0 *) val of_gen : elt gen -> t - (** Build a heap from a given [gen]. Complexity: [O(n log n)]. *) + (** [of_gen gen] is akin to {!of_list}, + but taking a [gen] of elements as input. *) val to_list : t -> elt list - (** Return the elements of the heap, in no particular order. *) + (** [to_list h] returns a list of the elements of the heap [h], + in no particular order. + *) val to_iter : t -> elt iter - (** Return a [iter] of the elements of the heap. + (** [to_iter h] is akin to {!to_list}, but returning an [iter] of elements. @since 2.8 *) val to_seq : t -> elt Seq.t - (** Return a [Seq.t] of the elements of the heap. - @since 2.8 *) + (** [to_seq h] is akin to {!to_list}, but returning a [Seq.t] of elements + Renamed from [to_std_seq] since 3.0. + @since 3.0 *) val to_gen : t -> elt gen - (** Return a [gen] of the elements of the heap. *) + (** [to_gen h] is akin to {!to_list}, but returning a [gen] of elements. *) val to_list_sorted : t -> elt list - (** Return the elements in increasing order. + (** [to_list_sorted h] returns the list of elements of the heap [h] + in increasing order. @since 1.1 *) val to_iter_sorted : t -> elt iter - (** Iterate on the elements, in increasing order. + (** [to_iter_sorted h] is akin to {!to_list_sorted}, + but returning an [iter] of elements. @since 2.8 *) val to_seq_sorted : t -> elt Seq.t - (** Iterate on the elements, in increasing order. - @since 2.8 *) + (** [to_seq_sorted h] is akin to {!to_list_sorted}, + but returning a [Seq.t] of elements. + Renamed from [to_std_seq_sorted] since 3.0. + @since 3.0 *) val to_tree : t -> elt ktree - (** Return a [ktree] of the elements of the heap. *) + (** [to_tree h] returns a [ktree] of the elements of the heap [h]. + The layout is not specified. *) + + (** {2 Pretty-printing} *) val to_string : ?sep:string -> (elt -> string) -> t -> string (** Print the heap in a string @@ -281,7 +305,7 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct | E -> 0 | N (_, _, l, r) -> 1 + size l + size r - (** {2 Conversions} *) + (** {2 Adding many elements at once} *) let add_list h l = List.fold_left add h l @@ -300,6 +324,8 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct | None -> h | Some x -> add_gen (add h x) g + (** {2 Conversions} *) + let of_list l = add_list empty l let of_iter i = add_iter empty i let of_seq seq = add_seq empty seq @@ -370,6 +396,8 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct | E -> `Nil | N (_, x, l, r) -> `Node (x, [ to_tree l; to_tree r ]) + (** {2 Pretty-printing} *) + let to_string ?(sep = ",") elt_to_string h = to_list_sorted h |> List.map elt_to_string |> String.concat sep diff --git a/src/core/CCHeap.mli b/src/core/CCHeap.mli index fdc4269ff..14b2917fe 100644 --- a/src/core/CCHeap.mli +++ b/src/core/CCHeap.mli @@ -91,7 +91,7 @@ module type S = sig val size : t -> int (** [size h] is the number of elements in the heap [h]. Linear complexity. *) - (** {2 Conversions} *) + (** {2 Adding many elements at once} *) val add_list : t -> elt list -> t (** [add_list h l] adds the elements of the list [l] into the heap [h]. @@ -99,65 +99,82 @@ module type S = sig @since 0.16 *) val add_iter : t -> elt iter -> t - (** [add_iter h iter] is like {!add_list}. + (** [add_iter h iter] is akin to {!add_list}, + but taking an [iter] of elements as input. @since 2.8 *) val add_seq : t -> elt Seq.t -> t - (** [add_seq h seq] is like {!add_list}. + (** [add_seq h seq] is akin to {!add_list}, + but taking a [Seq.t] of elements as input. Renamed from [add_std_seq] since 3.0. @since 3.0 *) val add_gen : t -> elt gen -> t - (** [add_gen h gen] adds the gen [gen] to the heap [h]. + (** [add_gen h gen] is akin to {!add_list}, + but taking a [gen] of elements as input. @since 0.16 *) + (** {2 Conversions} *) + val of_list : elt list -> t - (** [of_list l] is [add_list empty l]. Complexity: [O(n log n)]. *) + (** [of_list l] builds a heap from a given list of elements. + It is equivalent to [add_list empty l]. + Complexity: [O(n log n)]. + *) val of_iter : elt iter -> t - (** [of_iter iter] builds a heap from a given [iter]. Complexity: [O(n log n)]. + (** [of_iter iter] is akin to {!of_list}, + but taking an [iter] of elements as input. @since 2.8 *) val of_seq : elt Seq.t -> t - (** [of_seq seq] builds a heap from a given [Seq.t]. Complexity: [O(n log n)]. + (** [of_seq seq] is akin to {!of_list}, + but taking a [Seq.t] of elements as input. Renamed from [of_seq] since 3.0. @since 3.0 *) val of_gen : elt gen -> t - (** [of_gen gen] builds a heap from a given [gen]. Complexity: [O(n log n)]. *) + (** [of_gen gen] is akin to {!of_list}, + but taking a [gen] of elements as input. *) val to_list : t -> elt list - (** [to_list h] returns the elements of the heap [h], in no particular order. *) + (** [to_list h] returns a list of the elements of the heap [h], + in no particular order. + *) val to_iter : t -> elt iter - (** [to_iter h] returns a [iter] of the elements of the heap [h]. + (** [to_iter h] is akin to {!to_list}, but returning an [iter] of elements. @since 2.8 *) val to_seq : t -> elt Seq.t - (** [to_seq h] returns a [Seq.t] of the elements of the heap [h]. + (** [to_seq h] is akin to {!to_list}, but returning a [Seq.t] of elements Renamed from [to_std_seq] since 3.0. @since 3.0 *) val to_gen : t -> elt gen - (** [to_gen h] returns a [gen] of the elements of the heap [h]. *) + (** [to_gen h] is akin to {!to_list}, but returning a [gen] of elements. *) val to_list_sorted : t -> elt list - (** [to_list_sorted h] returns the elements of the heap [h] in increasing order. + (** [to_list_sorted h] returns the list of elements of the heap [h] + in increasing order. @since 1.1 *) val to_iter_sorted : t -> elt iter - (** [to_iter_sorted h] returns a [iter] by iterating on the elements of [h], - in increasing order. + (** [to_iter_sorted h] is akin to {!to_list_sorted}, + but returning an [iter] of elements. @since 2.8 *) val to_seq_sorted : t -> elt Seq.t - (** [to_seq_sorted h] returns a [Seq.t] by iterating on the elements of [h], - in increasing order. + (** [to_seq_sorted h] is akin to {!to_list_sorted}, + but returning a [Seq.t] of elements. Renamed from [to_std_seq_sorted] since 3.0. @since 3.0 *) val to_tree : t -> elt ktree - (** [to_tree h] returns a [ktree] of the elements of the heap [h]. *) + (** [to_tree h] returns a [ktree] of the elements of the heap [h]. + The layout is not specified. *) + + (** {2 Pretty-printing} *) val to_string : ?sep:string -> (elt -> string) -> t -> string (** [to_string ?sep f h] prints the heap [h] in a string From 793bad1e5bb7f2b6d3b7faac8d95d932ad5c4c61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Glen=20M=C3=A9vel?= Date: Fri, 26 Jul 2024 23:24:20 +0200 Subject: [PATCH 03/14] doc/CCHeap: document complexities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Committing to these complexities in documentation is not a constraint for representation of heaps, because they are achieved by every well-known representation (for some of them, in amortized time): https://en.wikipedia.org/wiki/Template:Heap_Running_Times - `find_min`: O(1) - `take`: O(log n) - `insert`: O(log n) - `merge`: O(log(m+n)) (excepted binary heaps which only achieve O(m+n)) - `add_seq`: O(n log(m+n)) (trivially, by repeated insertion) + this can be improved to O(log(m) + n), regardless of the representation of heaps (to be done in a later commit) - `of_seq`: O(n log n) (ditto: can be improved to O(n)) Less trivial: - `filter`, `delete_{one,all}`: + O(n) can be achieved for any reasonable representation of heaps, by using `of_seq` and `to_seq` which, as said, can always be made O(n). + With the current implementation, it is not obvious, but the complexity of `filter` and `delete_all` is Θ(n log n); the complexity of `delete_one` is O(n). Indeed, node rebuilding with `_make_node` is in O(1), merging is in Θ(log n), and every element deletion induces one merge; there are heap instances that achieve the worst case Ω(n log n), for instance: x / \ x y / \ ... y / x / \ h y with n/3 occurrences of x, n/3 occurrences of y, a sub-heap h of n/3 elements, and when y is greater than all elements of h; then, deleting all occurrences of x performs the following computation: merge (merge (merge (merge h y) …) y) y where each `merge` takes time Θ(log n). --- src/core/CCHeap.ml | 47 +++++++++++++++++++++++++++++++-------------- src/core/CCHeap.mli | 35 +++++++++++++++++++++++++-------- 2 files changed, 60 insertions(+), 22 deletions(-) diff --git a/src/core/CCHeap.ml b/src/core/CCHeap.ml index bd2f4c03c..5ece9364b 100644 --- a/src/core/CCHeap.ml +++ b/src/core/CCHeap.ml @@ -37,37 +37,47 @@ module type S = sig exception Empty val merge : t -> t -> t - (** Merge two heaps. *) + (** [merge h1 h2] merges the two heaps [h1] and [h2]. + Complexity: [O(log (m+n))] where [m] and [n] are the number of elements in each heap. + *) val insert : elt -> t -> t - (** Insert a value in the heap. *) + (** [insert x h] inserts an element [x] into the heap [h]. + Complexity: [O(log n)] where [n] is the number of elements in [h]. + *) val add : t -> elt -> t - (** Synonym to {!insert}. *) + (** [add h x] is [insert x h]. *) val filter : (elt -> bool) -> t -> t - (** Filter values, only retaining the ones that satisfy the predicate. - Linear time at least. *) + (** [filter p h] filters values, only retaining the ones that satisfy the predicate [p]. + Complexity: [O(n log n)]. + *) val find_min : t -> elt option - (** Find minimal element. *) + (** [find_min h] find the minimal element of the heap [h]. + Complexity: [O(1)]. + *) val find_min_exn : t -> elt - (** Like {!find_min} but can fail. + (** [find_min_exn h] is like {!find_min} but can fail. @raise Empty if the heap is empty. *) val take : t -> (t * elt) option - (** Extract and return the minimum element, and the new heap (without - this element), or [None] if the heap is empty. *) + (** [take h] extracts and returns the minimum element, and the new heap (without + this element), or [None] if the heap [h] is empty. + Complexity: [O(log n)]. + *) val take_exn : t -> t * elt - (** Like {!take}, but can fail. + (** [take_exn h] is like {!take}, but can fail. @raise Empty if the heap is empty. *) val delete_one : (elt -> elt -> bool) -> elt -> t -> t (** Delete one occurrence of a value if it exist in the heap. [delete_one eq x h], use [eq] to find one [x] in [h] and delete it. If [h] do not contain [x] then it return [h]. + Complexity: [O(n)]. @since 2.0 *) val delete_all : (elt -> elt -> bool) -> elt -> t -> t @@ -76,22 +86,27 @@ module type S = sig If [h] do not contain [x] then it return [h]. The difference with {!filter} is that [delete_all] stops as soon as it enters a subtree whose root is bigger than the element. + Complexity: [O(n log n)]. @since 2.0 *) val iter : (elt -> unit) -> t -> unit - (** Iterate on elements. *) + (** [iter f h] iterates over the heap [h] invoking [f] with the current element. *) val fold : ('a -> elt -> 'a) -> 'a -> t -> 'a - (** Fold on all values. *) + (** [fold f acc h] folds on all values of [h]. *) val size : t -> int - (** Number of elements (linear complexity). *) + (** [size h] is the number of elements in the heap [h]. + Complexity: [O(n)]. + *) (** {2 Adding many elements at once} *) val add_list : t -> elt list -> t (** [add_list h l] adds the elements of the list [l] into the heap [h]. An element occurring several times will be added that many times to the heap. + Complexity: [O(n log (m+n))] + where [m] and [n] are the number of elements in [h] and [l], respectively. @since 0.16 *) val add_iter : t -> elt iter -> t @@ -136,6 +151,7 @@ module type S = sig val to_list : t -> elt list (** [to_list h] returns a list of the elements of the heap [h], in no particular order. + Complexity: [O(n)]. *) val to_iter : t -> elt iter @@ -153,6 +169,7 @@ module type S = sig val to_list_sorted : t -> elt list (** [to_list_sorted h] returns the list of elements of the heap [h] in increasing order. + Complexity: [O(n log n)]. @since 1.1 *) val to_iter_sorted : t -> elt iter @@ -168,7 +185,9 @@ module type S = sig val to_tree : t -> elt ktree (** [to_tree h] returns a [ktree] of the elements of the heap [h]. - The layout is not specified. *) + The layout is not specified. + Complexity: [O(n)]. + *) (** {2 Pretty-printing} *) diff --git a/src/core/CCHeap.mli b/src/core/CCHeap.mli index 14b2917fe..214c99722 100644 --- a/src/core/CCHeap.mli +++ b/src/core/CCHeap.mli @@ -42,20 +42,27 @@ module type S = sig exception Empty val merge : t -> t -> t - (** [merge h1 h2] merges the two heaps [h1] and [h2]. *) + (** [merge h1 h2] merges the two heaps [h1] and [h2]. + Complexity: [O(log (m+n))] where [m] and [n] are the number of elements in each heap. + *) val insert : elt -> t -> t - (** [insert x h] inserts an element [x] into the heap [h]. *) + (** [insert x h] inserts an element [x] into the heap [h]. + Complexity: [O(log n)] where [n] is the number of elements in [h]. + *) val add : t -> elt -> t - (** [add h x] inserts an element [x] into the heap [h]. *) + (** [add h x] is [insert x h]. *) val filter : (elt -> bool) -> t -> t (** [filter p h] filters values, only retaining the ones that satisfy the predicate [p]. - Linear time at least. *) + Complexity: [O(n log n)]. + *) val find_min : t -> elt option - (** [find_min h] find the minimal element of the heap [h]. *) + (** [find_min h] find the minimal element of the heap [h]. + Complexity: [O(1)]. + *) val find_min_exn : t -> elt (** [find_min_exn h] is like {!find_min} but can fail. @@ -63,7 +70,9 @@ module type S = sig val take : t -> (t * elt) option (** [take h] extracts and returns the minimum element, and the new heap (without - this element), or [None] if the heap [h] is empty. *) + this element), or [None] if the heap [h] is empty. + Complexity: [O(log n)]. + *) val take_exn : t -> t * elt (** [take_exn h] is like {!take}, but can fail. @@ -73,6 +82,7 @@ module type S = sig (** [delete_one eq x h] uses [eq] to find one occurrence of a value [x] if it exist in the heap [h], and delete it. If [h] do not contain [x] then it return [h]. + Complexity: [O(n)]. @since 2.0 *) val delete_all : (elt -> elt -> bool) -> elt -> t -> t @@ -80,6 +90,7 @@ module type S = sig If [h] do not contain [x] then it return [h]. The difference with {!filter} is that [delete_all] stops as soon as it enters a subtree whose root is bigger than the element. + Complexity: [O(n log n)]. @since 2.0 *) val iter : (elt -> unit) -> t -> unit @@ -89,13 +100,17 @@ module type S = sig (** [fold f acc h] folds on all values of [h]. *) val size : t -> int - (** [size h] is the number of elements in the heap [h]. Linear complexity. *) + (** [size h] is the number of elements in the heap [h]. + Complexity: [O(n)]. + *) (** {2 Adding many elements at once} *) val add_list : t -> elt list -> t (** [add_list h l] adds the elements of the list [l] into the heap [h]. An element occurring several times will be added that many times to the heap. + Complexity: [O(n log (m+n))] + where [m] and [n] are the number of elements in [h] and [l], respectively. @since 0.16 *) val add_iter : t -> elt iter -> t @@ -140,6 +155,7 @@ module type S = sig val to_list : t -> elt list (** [to_list h] returns a list of the elements of the heap [h], in no particular order. + Complexity: [O(n)]. *) val to_iter : t -> elt iter @@ -157,6 +173,7 @@ module type S = sig val to_list_sorted : t -> elt list (** [to_list_sorted h] returns the list of elements of the heap [h] in increasing order. + Complexity: [O(n log n)]. @since 1.1 *) val to_iter_sorted : t -> elt iter @@ -172,7 +189,9 @@ module type S = sig val to_tree : t -> elt ktree (** [to_tree h] returns a [ktree] of the elements of the heap [h]. - The layout is not specified. *) + The layout is not specified. + Complexity: [O(n)]. + *) (** {2 Pretty-printing} *) From 8349a4d244b5077f2f097188b21a62b897097399 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Glen=20M=C3=A9vel?= Date: Fri, 26 Jul 2024 23:29:40 +0200 Subject: [PATCH 04/14] doc/CCHeap: fix grammar, improve doc of delete_{one,all} --- src/core/CCHeap.ml | 18 ++++++++++-------- src/core/CCHeap.mli | 34 ++++++++++++++++++---------------- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/src/core/CCHeap.ml b/src/core/CCHeap.ml index 5ece9364b..bab1fcda4 100644 --- a/src/core/CCHeap.ml +++ b/src/core/CCHeap.ml @@ -74,18 +74,20 @@ module type S = sig @raise Empty if the heap is empty. *) val delete_one : (elt -> elt -> bool) -> elt -> t -> t - (** Delete one occurrence of a value if it exist in the heap. - [delete_one eq x h], use [eq] to find one [x] in [h] and delete it. - If [h] do not contain [x] then it return [h]. + (** [delete_one eq x h] deletes an occurrence of the value [x] from the heap + [h], + if there is some. + If [h] does not contain [x], then [h] itself is returned. + Elements are identified by the equality function [eq]. Complexity: [O(n)]. @since 2.0 *) val delete_all : (elt -> elt -> bool) -> elt -> t -> t - (** Delete all occurrences of a value in the heap. - [delete_all eq x h], use [eq] to find all [x] in [h] and delete them. - If [h] do not contain [x] then it return [h]. - The difference with {!filter} is that [delete_all] stops as soon as - it enters a subtree whose root is bigger than the element. + (** [delete_all eq x h] deletes all occurrences of the value [x] from the heap [h]. + If [h] does not contain [x], then [h] itself is returned. + Elements are identified by the equality function [eq]. + By contrast with {!filter}, [delete_all] stops as soon as + it enters a subtree whose root is greater than [x]. Complexity: [O(n log n)]. @since 2.0 *) diff --git a/src/core/CCHeap.mli b/src/core/CCHeap.mli index 214c99722..4b7d858d9 100644 --- a/src/core/CCHeap.mli +++ b/src/core/CCHeap.mli @@ -79,17 +79,19 @@ module type S = sig @raise Empty if the heap is empty. *) val delete_one : (elt -> elt -> bool) -> elt -> t -> t - (** [delete_one eq x h] uses [eq] to find one occurrence of a value [x] - if it exist in the heap [h], and delete it. - If [h] do not contain [x] then it return [h]. + (** [delete_one eq x h] deletes an occurrence of the value [x] from the heap [h], + if there is some. + If [h] does not contain [x], then [h] itself is returned. + Elements are identified by the equality function [eq]. Complexity: [O(n)]. @since 2.0 *) val delete_all : (elt -> elt -> bool) -> elt -> t -> t - (** [delete_all eq x h] uses [eq] to find all [x] in [h] and delete them. - If [h] do not contain [x] then it return [h]. - The difference with {!filter} is that [delete_all] stops as soon as - it enters a subtree whose root is bigger than the element. + (** [delete_all eq x h] deletes all occurrences of the value [x] from the heap [h]. + If [h] does not contain [x], then [h] itself is returned. + Elements are identified by the equality function [eq]. + By contrast with {!filter}, [delete_all] stops as soon as + it enters a subtree whose root is greater than [x]. Complexity: [O(n log n)]. @since 2.0 *) @@ -196,9 +198,9 @@ module type S = sig (** {2 Pretty-printing} *) val to_string : ?sep:string -> (elt -> string) -> t -> string - (** [to_string ?sep f h] prints the heap [h] in a string - using [sep] as a given separator (default ",") between each element - (converted to a string using [f]). + (** [to_string ?sep f h] prints the heap [h] to a string, + using [f] to convert elements to strings + and [sep] (default: [","]) as a separator between elements. @since 2.7 *) val pp : @@ -209,17 +211,17 @@ module type S = sig t printer (** [pp ?pp_start ?pp_stop ?pp_sep ppf h] prints [h] on [ppf]. Each element is formatted with [ppf], [pp_start] is called at the beginning, - [pp_stop] is called at the end, [pp_sep] is called between each elements. - By defaults [pp_start] and [pp_stop] does nothing and [pp_sep] defaults to - (fun out -> Format.fprintf out ",@ "). + [pp_stop] is called at the end, [pp_sep] is called between each element. + By default, [pp_start] and [pp_stop] do nothing, and [pp_sep] is + [(fun out -> Format.fprintf out ",@ ")]. Renamed from [print] since 2.0 @since 0.16 *) end module Make (E : PARTIAL_ORD) : S with type elt = E.t -(** A convenient version of [Make] that take a [TOTAL_ORD] instead of +(** A convenient version of [Make] that takes a [TOTAL_ORD] instead of a partially ordered module. - It allow to directly pass modules that implement [compare] - without implementing [leq] explicitly *) + It allows to directly pass modules that implement [compare] + without implementing [leq] explicitly. *) module Make_from_compare (E : TOTAL_ORD) : S with type elt = E.t From cc2dd6d829783412db63f1149d308907a787cd87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Glen=20M=C3=A9vel?= Date: Sat, 27 Jul 2024 01:47:09 +0200 Subject: [PATCH 05/14] doc/CCHeap: move filter down --- src/core/CCHeap.ml | 22 +++++++++++----------- src/core/CCHeap.mli | 10 +++++----- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/core/CCHeap.ml b/src/core/CCHeap.ml index bab1fcda4..b02244465 100644 --- a/src/core/CCHeap.ml +++ b/src/core/CCHeap.ml @@ -49,11 +49,6 @@ module type S = sig val add : t -> elt -> t (** [add h x] is [insert x h]. *) - val filter : (elt -> bool) -> t -> t - (** [filter p h] filters values, only retaining the ones that satisfy the predicate [p]. - Complexity: [O(n log n)]. - *) - val find_min : t -> elt option (** [find_min h] find the minimal element of the heap [h]. Complexity: [O(1)]. @@ -91,6 +86,11 @@ module type S = sig Complexity: [O(n log n)]. @since 2.0 *) + val filter : (elt -> bool) -> t -> t + (** [filter p h] filters values, only retaining the ones that satisfy the predicate [p]. + Complexity: [O(n log n)]. + *) + val iter : (elt -> unit) -> t -> unit (** [iter f h] iterates over the heap [h] invoking [f] with the current element. *) @@ -251,12 +251,6 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct let insert x h = merge (N (1, x, E, E)) h let add h x = insert x h - let rec filter p h = - match h with - | E -> E - | N (_, x, l, r) when p x -> _make_node x (filter p l) (filter p r) - | N (_, _, l, r) -> merge (filter p l) (filter p r) - let find_min_exn = function | E -> raise Empty | N (_, x, _, _) -> x @@ -306,6 +300,12 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct else h + let rec filter p h = + match h with + | E -> E + | N (_, x, l, r) when p x -> _make_node x (filter p l) (filter p r) + | N (_, _, l, r) -> merge (filter p l) (filter p r) + let rec iter f h = match h with | E -> () diff --git a/src/core/CCHeap.mli b/src/core/CCHeap.mli index 4b7d858d9..ec14bc230 100644 --- a/src/core/CCHeap.mli +++ b/src/core/CCHeap.mli @@ -54,11 +54,6 @@ module type S = sig val add : t -> elt -> t (** [add h x] is [insert x h]. *) - val filter : (elt -> bool) -> t -> t - (** [filter p h] filters values, only retaining the ones that satisfy the predicate [p]. - Complexity: [O(n log n)]. - *) - val find_min : t -> elt option (** [find_min h] find the minimal element of the heap [h]. Complexity: [O(1)]. @@ -95,6 +90,11 @@ module type S = sig Complexity: [O(n log n)]. @since 2.0 *) + val filter : (elt -> bool) -> t -> t + (** [filter p h] filters values, only retaining the ones that satisfy the predicate [p]. + Complexity: [O(n log n)]. + *) + val iter : (elt -> unit) -> t -> unit (** [iter f h] iterates over the heap [h] invoking [f] with the current element. *) From 806bb8c7bcd1f29c5c789dafe1d5650888f0ac12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Glen=20M=C3=A9vel?= Date: Sat, 27 Jul 2024 04:24:15 +0200 Subject: [PATCH 06/14] perf/CCHeap: heap building in O(n) --- CHANGELOG.md | 5 +++ README.md | 2 +- src/core/CCHeap.ml | 95 +++++++++++++++++++++++++++++++++------------ src/core/CCHeap.mli | 6 ++- 4 files changed, 80 insertions(+), 28 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e36a8e51b..6ad8afe7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## main + +- perf: `CCHeap`: building a heap from n elements is now in time O(n) + instead of O(n log n) + ## 3.13.1 - list: TRMC was in 4.14, we can use it earlier diff --git a/README.md b/README.md index 072460c20..ea930024c 100644 --- a/README.md +++ b/README.md @@ -539,7 +539,7 @@ val h' : IntHeap.t = val x : int = 2 # IntHeap.to_list h' (* see, 2 is removed *);; -- : int list = [4; 6; 8; 10] +- : int list = [4; 8; 10; 6] ``` ### IO helpers diff --git a/src/core/CCHeap.ml b/src/core/CCHeap.ml index b02244465..7042c80de 100644 --- a/src/core/CCHeap.ml +++ b/src/core/CCHeap.ml @@ -7,6 +7,14 @@ type 'a gen = unit -> 'a option type 'a printer = Format.formatter -> 'a -> unit type 'a ktree = unit -> [ `Nil | `Node of 'a * 'a ktree list ] +let[@inline] _iter_map f xs k = xs (fun x -> k (f x)) + +let rec _gen_iter k g = + begin match g () with + | None -> () + | Some x -> k x; _gen_iter k g + end + module type PARTIAL_ORD = sig type t @@ -107,7 +115,8 @@ module type S = sig val add_list : t -> elt list -> t (** [add_list h l] adds the elements of the list [l] into the heap [h]. An element occurring several times will be added that many times to the heap. - Complexity: [O(n log (m+n))] + Elements need not be given in any particular order. + Complexity: [O(log m + n)] where [m] and [n] are the number of elements in [h] and [l], respectively. @since 0.16 *) @@ -132,7 +141,8 @@ module type S = sig val of_list : elt list -> t (** [of_list l] builds a heap from a given list of elements. It is equivalent to [add_list empty l]. - Complexity: [O(n log n)]. + Elements need not be given in any particular order. + Complexity: [O(n)]. *) val of_iter : elt iter -> t @@ -223,6 +233,8 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct exception Empty + let singleton x = N (1, x, E, E) + (* Rank of the tree *) let _rank = function | E -> 0 @@ -248,7 +260,7 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct else _make_node y a2 (merge t1 b2) - let insert x h = merge (N (1, x, E, E)) h + let insert x h = merge (singleton x) h let add h x = insert x h let find_min_exn = function @@ -326,31 +338,64 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct | E -> 0 | N (_, _, l, r) -> 1 + size l + size r - (** {2 Adding many elements at once} *) - - let add_list h l = List.fold_left add h l - - let add_iter h i = - let h = ref h in - i (fun x -> h := insert x !h); - !h - - let add_seq h seq = - let h = ref h in - Seq.iter (fun x -> h := insert x !h) seq; - !h + (** {2 Conversions from sequences} *) + + (* Merge an [iter] of k heaps into one. + Instead of folding [merge] in one pass (which would run in time O(k log N) + where k is the number of heaps and N is the total number of elements), it + is more efficient to merge heaps pairwise until only one remains; see e.g. + Robert Tarjan, "Data Structures and Network Algorithms", + Chapter 3.3 "Leftist heaps", 1983. + or: + Chris Okasaki, "Purely Functional Data Structures", + Chapter 3.2 "Leftist heaps", Exercise 3.3, 1998 + This is independent of the representation of heaps, and, as long as merging + is in time O(log n), this runs in time O(k + k*log(N/k)). Notice that this + is a O(k + N) (if k is small wrt. N, this last upper bound is very loose). + The code below uses additional space of only O(log(k)) at any moment; + it avoids storing an intermediate list of length O(k). *) + let _merge_heap_iter (hs : t iter) : t = + let rec cons_and_merge h0 hs weights = + begin match hs with + | h1 :: hs' when weights land 1 = 0 -> + cons_and_merge (merge h0 h1) hs' (weights lsr 1) + | _ -> + h0 :: hs + end + in + (* the i-th heap in this list is a merger of 2^{w_i} input heaps, each + having gone through w_i merge operations, where the "weights" 2^{w_i} are + strictly increasing wrt. i: *) + let mergers = ref [] in + (* The w_i are the 1-bits in the binary writing of [count], the number of + input heaps merged so far; adding a heap to the mergers works like binary + incrementation: *) + let count = ref 0 in + hs begin fun h -> + incr count ; + mergers := cons_and_merge h !mergers !count ; + end ; + List.fold_left merge E !mergers + + (* To build a heap with n given values, instead of repeated insertions, + it is more efficient to do pairwise merging, running in time O(n). *) + let of_iter xs = + xs + |> _iter_map singleton + |> _merge_heap_iter + + let of_list xs = of_iter (fun k -> List.iter k xs) + let of_seq xs = of_iter (fun k -> Seq.iter k xs) + let of_gen xs = of_iter (fun k -> _gen_iter k xs) - let rec add_gen h g = - match g () with - | None -> h - | Some x -> add_gen (add h x) g + (** {2 Adding many elements at once} *) - (** {2 Conversions} *) + let add_list h xs = merge h (of_list xs) + let add_iter h xs = merge h (of_iter xs) + let add_seq h xs = merge h (of_seq xs) + let add_gen h xs = merge h (of_gen xs) - let of_list l = add_list empty l - let of_iter i = add_iter empty i - let of_seq seq = add_seq empty seq - let of_gen g = add_gen empty g + (** {2 Conversions to sequences} *) let to_list h = let rec aux acc h = diff --git a/src/core/CCHeap.mli b/src/core/CCHeap.mli index ec14bc230..dab40bda2 100644 --- a/src/core/CCHeap.mli +++ b/src/core/CCHeap.mli @@ -111,7 +111,8 @@ module type S = sig val add_list : t -> elt list -> t (** [add_list h l] adds the elements of the list [l] into the heap [h]. An element occurring several times will be added that many times to the heap. - Complexity: [O(n log (m+n))] + Elements need not be given in any particular order. + Complexity: [O(log m + n)] where [m] and [n] are the number of elements in [h] and [l], respectively. @since 0.16 *) @@ -136,7 +137,8 @@ module type S = sig val of_list : elt list -> t (** [of_list l] builds a heap from a given list of elements. It is equivalent to [add_list empty l]. - Complexity: [O(n log n)]. + Elements need not be given in any particular order. + Complexity: [O(n)]. *) val of_iter : elt iter -> t From 3f95fd44e631d93075f39ee5183c4b41731398cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Glen=20M=C3=A9vel?= Date: Sat, 27 Jul 2024 14:29:54 +0200 Subject: [PATCH 07/14] perf/CCHeap: filter, delete_all in O(n) and ensure (==) - for `delete_all` this is a bugfix (physical equality was documented but not implemented) - `delete_one` is unchanged, it already had complexity O(n) and ensured physical equality --- CHANGELOG.md | 3 ++ src/core/CCHeap.ml | 122 +++++++++++++++++++++++++++++--------------- src/core/CCHeap.mli | 5 +- 3 files changed, 86 insertions(+), 44 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ad8afe7f..00f34f5f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,9 @@ - perf: `CCHeap`: building a heap from n elements is now in time O(n) instead of O(n log n) +- perf: `CCHeap`: `filter` and `delete_all` are now in time O(n) + instead of O(n log n), and they ensure physical equality + (for `delete_all` this is a bugfix) ## 3.13.1 diff --git a/src/core/CCHeap.ml b/src/core/CCHeap.ml index 7042c80de..23c2cd418 100644 --- a/src/core/CCHeap.ml +++ b/src/core/CCHeap.ml @@ -91,12 +91,13 @@ module type S = sig Elements are identified by the equality function [eq]. By contrast with {!filter}, [delete_all] stops as soon as it enters a subtree whose root is greater than [x]. - Complexity: [O(n log n)]. + Complexity: [O(n)]. @since 2.0 *) val filter : (elt -> bool) -> t -> t (** [filter p h] filters values, only retaining the ones that satisfy the predicate [p]. - Complexity: [O(n log n)]. + If no element in [h] satisfies [p], then [h] itself is returned. + Complexity: [O(n)]. *) val iter : (elt -> unit) -> t -> unit @@ -279,45 +280,6 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct | E -> raise Empty | N (_, x, l, r) -> merge l r, x - let delete_one eq x h = - let rec aux = function - | E -> false, E - | N (_, y, l, r) as h -> - if eq x y then - true, merge l r - else if E.leq y x then ( - let found_left, l1 = aux l in - let found, r1 = - if found_left then - true, r - else - aux r - in - if found then - true, _make_node y l1 r1 - else - false, h - ) else - false, h - in - snd (aux h) - - let rec delete_all eq x = function - | E -> E - | N (_, y, l, r) as h -> - if eq x y then - merge (delete_all eq x l) (delete_all eq x r) - else if E.leq y x then - _make_node y (delete_all eq x l) (delete_all eq x r) - else - h - - let rec filter p h = - match h with - | E -> E - | N (_, x, l, r) when p x -> _make_node x (filter p l) (filter p r) - | N (_, _, l, r) -> merge (filter p l) (filter p r) - let rec iter f h = match h with | E -> () @@ -353,7 +315,9 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct is in time O(log n), this runs in time O(k + k*log(N/k)). Notice that this is a O(k + N) (if k is small wrt. N, this last upper bound is very loose). The code below uses additional space of only O(log(k)) at any moment; - it avoids storing an intermediate list of length O(k). *) + it avoids storing an intermediate list of length O(k). + When at most one of the input heaps is non-empty, the result is physically + equal to it. *) let _merge_heap_iter (hs : t iter) : t = let rec cons_and_merge h0 hs weights = begin match hs with @@ -462,6 +426,80 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct | E -> `Nil | N (_, x, l, r) -> `Node (x, [ to_tree l; to_tree r ]) + (** {2 Filtering} *) + + let delete_one eq x h = + let rec aux = function + | E -> false, E + | N (_, y, l, r) as h -> + if eq x y then + true, merge l r + else if E.leq y x then ( + let found_left, l1 = aux l in + let found, r1 = + if found_left then + true, r + else + aux r + in + if found then + true, _make_node y l1 r1 + else + false, h + ) else + false, h + in + snd (aux h) + + + let delete_all eq x0 h = + (* Iterates [k] on sub-heaps of [h] whose merger is equal to [h] minus + the deleted elements [x0]; we do this, instead of merging the subheaps + directly, in order to ensure complexity O(n). + When no element is deleted, the iterator does nothing and the function + returns true; this makes sure that the result shares sub-heaps with the + input as much as possible, and ensures physical equality when no element + is deleted. *) + let rec iter_subheaps eq x0 h k = + begin match h with + | N (_, x, l, r) when E.leq x x0 -> + let keep_x = not (eq x0 x) in + let keep_l = iter_subheaps eq x0 l k in + let keep_r = iter_subheaps eq x0 r k in + if keep_x && keep_l && keep_r then + true + else begin + if keep_x then k (singleton x) ; + if keep_l then k l ; + if keep_r then k r ; + false + end + | _ -> true + end + in + _merge_heap_iter (fun k -> if iter_subheaps eq x0 h k then k h) + + let filter p h = + (* similar to [delete_all] *) + let rec iter_subheaps p k h = + begin match h with + | E -> true + | N (_, x, l, r) -> + let keep_x = p x in + let keep_l = iter_subheaps p k l in + let keep_r = iter_subheaps p k r in + if keep_x && keep_l && keep_r then + true + else begin + if keep_x then k (singleton x) ; + if keep_l then k l ; + if keep_r then k r ; + false + end + end + in + _merge_heap_iter (fun k -> if iter_subheaps p k h then k h) + (** {2 Pretty-printing} *) let to_string ?(sep = ",") elt_to_string h = diff --git a/src/core/CCHeap.mli b/src/core/CCHeap.mli index dab40bda2..fe40aa191 100644 --- a/src/core/CCHeap.mli +++ b/src/core/CCHeap.mli @@ -87,12 +87,13 @@ module type S = sig Elements are identified by the equality function [eq]. By contrast with {!filter}, [delete_all] stops as soon as it enters a subtree whose root is greater than [x]. - Complexity: [O(n log n)]. + Complexity: [O(n)]. @since 2.0 *) val filter : (elt -> bool) -> t -> t (** [filter p h] filters values, only retaining the ones that satisfy the predicate [p]. - Complexity: [O(n log n)]. + If no element in [h] satisfies [p], then [h] itself is returned. + Complexity: [O(n)]. *) val iter : (elt -> unit) -> t -> unit From 6c810eb83d0778c136e95037ace41054d2fa6ecf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Glen=20M=C3=A9vel?= Date: Sat, 27 Jul 2024 14:35:27 +0200 Subject: [PATCH 08/14] doc/CCHeap: document (==) for merge --- src/core/CCHeap.ml | 1 + src/core/CCHeap.mli | 1 + 2 files changed, 2 insertions(+) diff --git a/src/core/CCHeap.ml b/src/core/CCHeap.ml index 23c2cd418..984d4a09a 100644 --- a/src/core/CCHeap.ml +++ b/src/core/CCHeap.ml @@ -46,6 +46,7 @@ module type S = sig val merge : t -> t -> t (** [merge h1 h2] merges the two heaps [h1] and [h2]. + If one heap is empty, the result is physically equal to the other heap. Complexity: [O(log (m+n))] where [m] and [n] are the number of elements in each heap. *) diff --git a/src/core/CCHeap.mli b/src/core/CCHeap.mli index fe40aa191..79bbe32d5 100644 --- a/src/core/CCHeap.mli +++ b/src/core/CCHeap.mli @@ -43,6 +43,7 @@ module type S = sig val merge : t -> t -> t (** [merge h1 h2] merges the two heaps [h1] and [h2]. + If one heap is empty, the result is physically equal to the other heap. Complexity: [O(log (m+n))] where [m] and [n] are the number of elements in each heap. *) From fdfc806afbab4cc7690cb2b73a37060f502fca9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Glen=20M=C3=A9vel?= Date: Sat, 27 Jul 2024 14:50:28 +0200 Subject: [PATCH 09/14] CCHeap: avoid boxing in delete_one --- src/core/CCHeap.ml | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/src/core/CCHeap.ml b/src/core/CCHeap.ml index 984d4a09a..32120c6b2 100644 --- a/src/core/CCHeap.ml +++ b/src/core/CCHeap.ml @@ -429,29 +429,22 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct (** {2 Filtering} *) - let delete_one eq x h = - let rec aux = function - | E -> false, E - | N (_, y, l, r) as h -> - if eq x y then - true, merge l r - else if E.leq y x then ( - let found_left, l1 = aux l in - let found, r1 = - if found_left then - true, r + let rec delete_one eq x0 = function + | N (_, x, l, r) as h when E.leq x x0 -> + if eq x0 x then + merge l r + else begin + let l' = delete_one eq x0 l in + if CCEqual.physical l' l then + let r' = delete_one eq x0 r in + if CCEqual.physical r' r then + h else - aux r - in - if found then - true, _make_node y l1 r1 + _make_node x l r' else - false, h - ) else - false, h - in - snd (aux h) - + _make_node x l' r + end + | h -> h let delete_all eq x0 h = (* Iterates [k] on sub-heaps of [h] whose merger is equal to [h] minus From 78e67a9f4af804e0bf7533160a258cfff38ca8c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Glen=20M=C3=A9vel?= Date: Sat, 27 Jul 2024 17:22:42 +0200 Subject: [PATCH 10/14] tests/CCHeap: improve existing tests - label all tests - decouple tests about different heap functions - random instances now have better coverage of possible cases: + more variability in size (previously, some tests were limited to a fixed size) + high probability of duplicates (previously, the probability of duplicates was negligible, because elements were drawn uniformly from the full `int` range) - the test for `of_list, take_exn` is now more precise (added a duplicate element) - the test for `to_list_sorted` is now more precise (checks that the resulting list is what we want, instead of just checking that it is sorted) - the test for `filter` is now more precise (also checks that no element has been spuriously dropped) - more uniform style for easier reading, using `|>` --- tests/core/t_heap.ml | 157 ++++++++++++++++++++++++------------------- 1 file changed, 86 insertions(+), 71 deletions(-) diff --git a/tests/core/t_heap.ml b/tests/core/t_heap.ml index 6c2ee2ed7..9a3e96a0e 100644 --- a/tests/core/t_heap.ml +++ b/tests/core/t_heap.ml @@ -2,106 +2,121 @@ open CCHeap module T = (val Containers_testlib.make ~__FILE__ ()) include T +(* A QCheck generator for natural numbers that are not too large (larger than + * [small_nat] but smaller than [big_nat]), with a bias towards smaller numbers. + * This also happens to be what QCheck uses for picking a length for a list + * generated by [QCheck.list]. + * QCheck defines this generator under the name [nat] but does not expose it. *) +let medium_nat = + Q.make ~print:Q.Print.int ~shrink:Q.Shrink.int ~small:(fun _ -> 1) + (fun st -> + let p = Random.State.float st 1. in + if p < 0.5 then Random.State.int st 10 + else if p < 0.75 then Random.State.int st 100 + else if p < 0.95 then Random.State.int st 1_000 + else Random.State.int st 10_000 + ) + module H = CCHeap.Make (struct type t = int - let leq x y = x <= y end) -let rec is_sorted l = - match l with - | [ _ ] | [] -> true - | x :: (y :: _ as l') -> x <= y && is_sorted l' +;; -let extract_list = H.to_list_sorted;; +t ~name:"of_list, take_exn" @@ fun () -> + let h = H.of_list [ 5; 4; 3; 4; 1; 42; 0 ] in + let h, x = H.take_exn h in + assert_equal ~printer:string_of_int 0 x; + let h, x = H.take_exn h in + assert_equal ~printer:string_of_int 1 x; + let h, x = H.take_exn h in + assert_equal ~printer:string_of_int 3 x; + let h, x = H.take_exn h in + assert_equal ~printer:string_of_int 4 x; + let h, x = H.take_exn h in + assert_equal ~printer:string_of_int 4 x; + let h, x = H.take_exn h in + assert_equal ~printer:string_of_int 5 x; + let h, x = H.take_exn h in + assert_equal ~printer:string_of_int 42 x; + assert_raises ((=) H.Empty) (fun () -> H.take_exn h); + true +;; -t @@ fun () -> -let h = H.of_list [ 5; 3; 4; 1; 42; 0 ] in -let h, x = H.take_exn h in -assert_equal ~printer:string_of_int 0 x; -let h, x = H.take_exn h in -assert_equal ~printer:string_of_int 1 x; -let h, x = H.take_exn h in -assert_equal ~printer:string_of_int 3 x; -let h, x = H.take_exn h in -assert_equal ~printer:string_of_int 4 x; -let h, x = H.take_exn h in -assert_equal ~printer:string_of_int 5 x; -let h, x = H.take_exn h in -assert_equal ~printer:string_of_int 42 x; -assert_raises - (function - | H.Empty -> true - | _ -> false) - (fun () -> H.take_exn h); -true +q ~name:"of_list, to_list" + ~count:30 + Q.(list medium_nat) + (fun l -> + (l |> H.of_list |> H.to_list |> List.sort CCInt.compare) + = (l |> List.sort CCInt.compare)) ;; -q ~count:30 - Q.(list_of_size Gen.(return 1_000) int) +q ~name:"of_list, to_list_sorted" + ~count:30 + Q.(list medium_nat) + (fun l -> + (l |> H.of_list |> H.to_list_sorted) + = (l |> List.sort CCInt.compare)) +;; + +(* The remaining tests assume the correctness of + [of_list], [to_list], [to_list_sorted]. *) + +q ~name:"size" + ~count:30 + Q.(list_of_size Gen.small_nat medium_nat) (fun l -> - (* put elements into a heap *) - let h = H.of_iter (Iter.of_list l) in - assert_equal 1_000 (H.size h); - let l' = extract_list h in - is_sorted l') + (l |> H.of_list |> H.size) + = (l |> List.length)) ;; -(* test filter *) -q ~count:30 - Q.(list_of_size Gen.(return 1_000) int) +q ~name:"filter" + Q.(list medium_nat) (fun l -> - (* put elements into a heap *) - let h = H.of_iter (Iter.of_list l) in - let h = H.filter (fun x -> x mod 2 = 0) h in - assert (H.to_iter h |> Iter.for_all (fun x -> x mod 2 = 0)); - let l' = extract_list h in - is_sorted l') + let p = (fun x -> x mod 2 = 0) in + let l' = l |> H.of_list |> H.filter p |> H.to_list in + List.for_all p l' && List.length l' = List.length (List.filter p l)) ;; -q - Q.(list_of_size Gen.(return 1_000) int) +q ~name:"to_iter_sorted" + Q.(list_of_size Gen.small_nat medium_nat) (fun l -> - (* put elements into a heap *) - let h = H.of_iter (Iter.of_list l) in - let l' = H.to_iter_sorted h |> Iter.to_list in - is_sorted l') + (l |> H.of_list |> H.to_iter_sorted |> Iter.to_list) + = (l |> List.sort CCInt.compare)) ;; -q - Q.(list int) +q ~name:"of_gen" + Q.(list_of_size Gen.small_nat medium_nat) (fun l -> - extract_list (H.of_list l) = extract_list (H.of_gen (CCList.to_gen l))) + (l |> CCList.to_gen |> H.of_gen |> H.to_list_sorted) + = (l |> List.sort CCInt.compare)) ;; -q - Q.(list int) +q ~name:"to_gen" + Q.(list_of_size Gen.small_nat medium_nat) (fun l -> - let h = H.of_list l in - H.to_gen h |> CCList.of_gen |> List.sort Stdlib.compare - = (H.to_list h |> List.sort Stdlib.compare)) + (l |> H.of_list |> H.to_gen |> CCList.of_gen |> List.sort CCInt.compare) + = (l |> List.sort CCInt.compare)) ;; -q - Q.(list int) +q ~name:"to_string with default sep" + Q.(list_of_size Gen.small_nat medium_nat) (fun l -> - let h = H.of_list l in - H.to_string string_of_int h - = (List.sort Stdlib.compare l |> List.map string_of_int |> String.concat ",")) + (l |> H.of_list |> H.to_string string_of_int) + = (l |> List.sort CCInt.compare |> List.map string_of_int |> String.concat ",")) ;; -q - Q.(list int) +q ~name:"to_string with space as sep" + Q.(list_of_size Gen.small_nat medium_nat) (fun l -> - let h = H.of_list l in - H.to_string ~sep:" " string_of_int h - = (List.sort Stdlib.compare l |> List.map string_of_int |> String.concat " ")) + (l |> H.of_list |> H.to_string ~sep:" " string_of_int) + = (l |> List.sort CCInt.compare |> List.map string_of_int |> String.concat " ")) ;; -q - Q.(list_of_size Gen.(return 1_000) int) +q ~name:"Make_from_compare" + Q.(list_of_size Gen.small_nat medium_nat) (fun l -> let module H' = Make_from_compare (CCInt) in - let h = H'.of_list l in - let l' = H'.to_list_sorted h in - is_sorted l') + (l |> H'.of_list |> H'.to_list_sorted) + = (l |> List.sort CCInt.compare)) From 92676f5513bc70535825c105ed4cf2284d999dcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Glen=20M=C3=A9vel?= Date: Sat, 27 Jul 2024 17:31:32 +0200 Subject: [PATCH 11/14] tests/CCHeap: reorder tests --- tests/core/t_heap.ml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/core/t_heap.ml b/tests/core/t_heap.ml index 9a3e96a0e..82487e364 100644 --- a/tests/core/t_heap.ml +++ b/tests/core/t_heap.ml @@ -79,24 +79,24 @@ q ~name:"filter" List.for_all p l' && List.length l' = List.length (List.filter p l)) ;; -q ~name:"to_iter_sorted" +q ~name:"of_gen" Q.(list_of_size Gen.small_nat medium_nat) (fun l -> - (l |> H.of_list |> H.to_iter_sorted |> Iter.to_list) + (l |> CCList.to_gen |> H.of_gen |> H.to_list_sorted) = (l |> List.sort CCInt.compare)) ;; -q ~name:"of_gen" +q ~name:"to_gen" Q.(list_of_size Gen.small_nat medium_nat) (fun l -> - (l |> CCList.to_gen |> H.of_gen |> H.to_list_sorted) + (l |> H.of_list |> H.to_gen |> CCList.of_gen |> List.sort CCInt.compare) = (l |> List.sort CCInt.compare)) ;; -q ~name:"to_gen" +q ~name:"to_iter_sorted" Q.(list_of_size Gen.small_nat medium_nat) (fun l -> - (l |> H.of_list |> H.to_gen |> CCList.of_gen |> List.sort CCInt.compare) + (l |> H.of_list |> H.to_iter_sorted |> Iter.to_list) = (l |> List.sort CCInt.compare)) ;; From 13db1c31e9c4f6373ba36ffe97b204e6e9ebcfd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Glen=20M=C3=A9vel?= Date: Sat, 27 Jul 2024 18:10:56 +0200 Subject: [PATCH 12/14] tests/CCHeap: add tests --- tests/core/t_heap.ml | 120 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 119 insertions(+), 1 deletion(-) diff --git a/tests/core/t_heap.ml b/tests/core/t_heap.ml index 82487e364..cd47f2f33 100644 --- a/tests/core/t_heap.ml +++ b/tests/core/t_heap.ml @@ -17,6 +17,16 @@ let medium_nat = else Random.State.int st 10_000 ) +let list_delete_first (x0 : int) (xs : int list) : int list = + let rec aux acc xs = + begin match xs with + | [] -> List.rev acc + | x :: xs' when x = x0 -> List.rev_append acc xs' + | x :: xs' -> aux (x :: acc) xs' + end + in + aux [] xs + module H = CCHeap.Make (struct type t = int let leq x y = x <= y @@ -24,22 +34,30 @@ end) ;; -t ~name:"of_list, take_exn" @@ fun () -> +t ~name:"of_list, find_min_exn, take_exn" @@ fun () -> let h = H.of_list [ 5; 4; 3; 4; 1; 42; 0 ] in + assert_equal ~printer:string_of_int 0 (H.find_min_exn h); let h, x = H.take_exn h in assert_equal ~printer:string_of_int 0 x; + assert_equal ~printer:string_of_int 1 (H.find_min_exn h); let h, x = H.take_exn h in assert_equal ~printer:string_of_int 1 x; + assert_equal ~printer:string_of_int 3 (H.find_min_exn h); let h, x = H.take_exn h in assert_equal ~printer:string_of_int 3 x; + assert_equal ~printer:string_of_int 4 (H.find_min_exn h); let h, x = H.take_exn h in assert_equal ~printer:string_of_int 4 x; + assert_equal ~printer:string_of_int 4 (H.find_min_exn h); let h, x = H.take_exn h in assert_equal ~printer:string_of_int 4 x; + assert_equal ~printer:string_of_int 5 (H.find_min_exn h); let h, x = H.take_exn h in assert_equal ~printer:string_of_int 5 x; + assert_equal ~printer:string_of_int 42 (H.find_min_exn h); let h, x = H.take_exn h in assert_equal ~printer:string_of_int 42 x; + assert_raises ((=) H.Empty) (fun () -> H.find_min_exn h); assert_raises ((=) H.Empty) (fun () -> H.take_exn h); true ;; @@ -71,6 +89,41 @@ q ~name:"size" = (l |> List.length)) ;; +q ~name:"insert" + Q.(pair medium_nat (list medium_nat)) + (fun (x, l) -> + (l |> H.of_list |> H.insert x |> H.to_list_sorted) + = ((x::l) |> List.sort CCInt.compare)) +;; + +q ~name:"merge" + Q.(pair (list medium_nat) (list medium_nat)) + (fun (l1, l2) -> + (H.merge (H.of_list l1) (H.of_list l2) |> H.to_list_sorted) + = ((l1@l2) |> List.sort CCInt.compare)) +;; + +q ~name:"add_list" + Q.(pair (list medium_nat) (list medium_nat)) + (fun (l1, l2) -> + (H.add_list (H.of_list l1) l2 |> H.to_list_sorted) + = ((l1@l2) |> List.sort CCInt.compare)) +;; + +q ~name:"delete_one" + Q.(pair medium_nat (list medium_nat)) + (fun (x, l) -> + (l |> H.of_list |> H.delete_one (=) x |> H.to_list_sorted) + = (l |> list_delete_first x |> List.sort CCInt.compare)) +;; + +q ~name:"delete_all" + Q.(pair medium_nat (list medium_nat)) + (fun (x, l) -> + (l |> H.of_list |> H.delete_all (=) x |> H.to_list_sorted) + = (l |> List.filter ((<>) x) |> List.sort CCInt.compare)) +;; + q ~name:"filter" Q.(list medium_nat) (fun l -> @@ -79,6 +132,50 @@ q ~name:"filter" List.for_all p l' && List.length l' = List.length (List.filter p l)) ;; +t ~name:"physical equality" @@ fun () -> + let h = H.of_list [ 5; 4; 3; 4; 1; 42; 0 ] in + assert_bool "physical equality of merge with left empty" + (CCEqual.physical h (H.merge H.empty h)) ; + assert_bool "physical equality of merge with right empty" + (CCEqual.physical h (H.merge h H.empty)) ; + assert_bool "physical equality of delete_one with element lesser than min" + (CCEqual.physical h (H.delete_one (=) (-999) h)) ; + assert_bool "physical equality of delete_one with element between min and max" + (CCEqual.physical h (H.delete_one (=) 2 h)) ; + assert_bool "physical equality of delete_one with element greater than max" + (CCEqual.physical h (H.delete_one (=) 999 h)) ; + assert_bool "physical equality of delete_all with element lesser than min" + (CCEqual.physical h (H.delete_all (=) (-999) h)) ; + assert_bool "physical equality of delete_all with element between min and max" + (CCEqual.physical h (H.delete_all (=) 2 h)) ; + assert_bool "physical equality of delete_all with element greater than max" + (CCEqual.physical h (H.delete_all (=) 999 h)) ; + assert_bool "physical equality of filter" + (CCEqual.physical h (H.filter (fun _ -> true) h)) ; + true +;; + +q ~name:"fold" + Q.(list_of_size Gen.small_nat medium_nat) + (fun l -> + (l |> H.of_list |> H.fold (+) 0) + = (l |> List.fold_left (+) 0)) +;; + +q ~name:"of_iter" + Q.(list_of_size Gen.small_nat medium_nat) + (fun l -> + (l |> CCList.to_iter |> H.of_iter |> H.to_list_sorted) + = (l |> List.sort CCInt.compare)) +;; + +q ~name:"of_seq" + Q.(list_of_size Gen.small_nat medium_nat) + (fun l -> + (l |> CCList.to_seq |> H.of_seq |> H.to_list_sorted) + = (l |> List.sort CCInt.compare)) +;; + q ~name:"of_gen" Q.(list_of_size Gen.small_nat medium_nat) (fun l -> @@ -86,6 +183,20 @@ q ~name:"of_gen" = (l |> List.sort CCInt.compare)) ;; +q ~name:"to_iter" + Q.(list_of_size Gen.small_nat medium_nat) + (fun l -> + (l |> H.of_list |> H.to_iter |> CCList.of_iter |> List.sort CCInt.compare) + = (l |> List.sort CCInt.compare)) +;; + +q ~name:"to_seq" + Q.(list_of_size Gen.small_nat medium_nat) + (fun l -> + (l |> H.of_list |> H.to_seq |> CCList.of_seq |> List.sort CCInt.compare) + = (l |> List.sort CCInt.compare)) +;; + q ~name:"to_gen" Q.(list_of_size Gen.small_nat medium_nat) (fun l -> @@ -100,6 +211,13 @@ q ~name:"to_iter_sorted" = (l |> List.sort CCInt.compare)) ;; +q ~name:"to_seq_sorted" + Q.(list_of_size Gen.small_nat medium_nat) + (fun l -> + (l |> H.of_list |> H.to_seq_sorted |> CCList.of_seq |> List.sort CCInt.compare) + = (l |> List.sort CCInt.compare)) +;; + q ~name:"to_string with default sep" Q.(list_of_size Gen.small_nat medium_nat) (fun l -> From a24e1f7472c6cb5f377817dcd29deb6fd0571f61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Glen=20M=C3=A9vel?= Date: Wed, 31 Jul 2024 16:02:19 +0200 Subject: [PATCH 13/14] doc/CCHeap: fix English, improve wording, add sections --- src/core/CCHeap.ml | 63 ++++++++++++++++++++++++-------------- src/core/CCHeap.mli | 73 ++++++++++++++++++++++++++------------------- 2 files changed, 84 insertions(+), 52 deletions(-) diff --git a/src/core/CCHeap.ml b/src/core/CCHeap.ml index 32120c6b2..28fc9e835 100644 --- a/src/core/CCHeap.ml +++ b/src/core/CCHeap.ml @@ -36,14 +36,16 @@ module type S = sig type elt type t + exception Empty + + (** {2 Basing heap operations} *) + val empty : t (** Empty heap. *) val is_empty : t -> bool (** Is the heap empty? *) - exception Empty - val merge : t -> t -> t (** [merge h1 h2] merges the two heaps [h1] and [h2]. If one heap is empty, the result is physically equal to the other heap. @@ -59,24 +61,35 @@ module type S = sig (** [add h x] is [insert x h]. *) val find_min : t -> elt option - (** [find_min h] find the minimal element of the heap [h]. + (** [find_min h] returns the minimal element of [h], + or [None] if [h] is empty. Complexity: [O(1)]. *) val find_min_exn : t -> elt - (** [find_min_exn h] is like {!find_min} but can fail. + (** [find_min_exn h] is akin to {!find_min}, + but it raises {!Empty} when the heap is empty. @raise Empty if the heap is empty. *) val take : t -> (t * elt) option - (** [take h] extracts and returns the minimum element, and the new heap (without - this element), or [None] if the heap [h] is empty. + (** [take h] returns the minimum element of [h] + and the new heap without this element, + or [None] if [h] is empty. Complexity: [O(log n)]. *) val take_exn : t -> t * elt - (** [take_exn h] is like {!take}, but can fail. + (** [take_exn h] is akin to {!take}, + but it raises {!Empty} when the heap is empty. @raise Empty if the heap is empty. *) + val size : t -> int + (** [size h] is the number of elements in the heap [h]. + Complexity: [O(n)]. + *) + + (** {2 Deleting elements} *) + val delete_one : (elt -> elt -> bool) -> elt -> t -> t (** [delete_one eq x h] deletes an occurrence of the value [x] from the heap [h], @@ -90,27 +103,25 @@ module type S = sig (** [delete_all eq x h] deletes all occurrences of the value [x] from the heap [h]. If [h] does not contain [x], then [h] itself is returned. Elements are identified by the equality function [eq]. - By contrast with {!filter}, [delete_all] stops as soon as - it enters a subtree whose root is greater than [x]. + This function is more efficient than {!filter} + because it avoids considering elements greater than [x]. Complexity: [O(n)]. @since 2.0 *) val filter : (elt -> bool) -> t -> t - (** [filter p h] filters values, only retaining the ones that satisfy the predicate [p]. + (** [filter p h] filters the elements of [h], + only retaining those that satisfy the predicate [p]. If no element in [h] satisfies [p], then [h] itself is returned. Complexity: [O(n)]. *) + (** {2 Iterating on elements} *) + val iter : (elt -> unit) -> t -> unit - (** [iter f h] iterates over the heap [h] invoking [f] with the current element. *) + (** [iter f h] invokes [f] on every element of the heap [h]. *) val fold : ('a -> elt -> 'a) -> 'a -> t -> 'a - (** [fold f acc h] folds on all values of [h]. *) - - val size : t -> int - (** [size h] is the number of elements in the heap [h]. - Complexity: [O(n)]. - *) + (** [fold f acc h] folds on all elements of [h]. *) (** {2 Adding many elements at once} *) @@ -118,6 +129,7 @@ module type S = sig (** [add_list h l] adds the elements of the list [l] into the heap [h]. An element occurring several times will be added that many times to the heap. Elements need not be given in any particular order. + This function is more efficient than repeated insertions. Complexity: [O(log m + n)] where [m] and [n] are the number of elements in [h] and [l], respectively. @since 0.16 *) @@ -141,9 +153,10 @@ module type S = sig (** {2 Conversions} *) val of_list : elt list -> t - (** [of_list l] builds a heap from a given list of elements. - It is equivalent to [add_list empty l]. + (** [of_list l] builds a heap from the list of elements [l]. Elements need not be given in any particular order. + This function is more efficient than repeated insertions. + It is equivalent to [add_list empty l]. Complexity: [O(n)]. *) @@ -155,7 +168,7 @@ module type S = sig val of_seq : elt Seq.t -> t (** [of_seq seq] is akin to {!of_list}, but taking a [Seq.t] of elements as input. - Renamed from [of_seq] since 3.0. + Renamed from [of_std_seq] since 3.0. @since 3.0 *) val of_gen : elt gen -> t @@ -173,7 +186,7 @@ module type S = sig @since 2.8 *) val to_seq : t -> elt Seq.t - (** [to_seq h] is akin to {!to_list}, but returning a [Seq.t] of elements + (** [to_seq h] is akin to {!to_list}, but returning a [Seq.t] of elements. Renamed from [to_std_seq] since 3.0. @since 3.0 *) @@ -453,7 +466,13 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct When no element is deleted, the iterator does nothing and the function returns true; this makes sure that the result shares sub-heaps with the input as much as possible, and ensures physical equality when no element - is deleted. *) + is deleted. + In [delete_all], by contrast with [filter], we can avoid considering + elements greater than [x0]. As a consequence, the complexity is more + precisely O(k + k log(n/k)), where k is the number of elements not + greater than [x0]. This is a O(n), but it is also a O(k log n), which is + much smaller than O(n) if k is asymptotically smaller than n. + *) let rec iter_subheaps eq x0 h k = begin match h with | N (_, x, l, r) when E.leq x x0 -> diff --git a/src/core/CCHeap.mli b/src/core/CCHeap.mli index 79bbe32d5..a496998a6 100644 --- a/src/core/CCHeap.mli +++ b/src/core/CCHeap.mli @@ -33,13 +33,15 @@ module type S = sig type elt type t + exception Empty + + (** {2 Basic heap operations} *) + val empty : t (** [empty] returns the empty heap. *) val is_empty : t -> bool - (** [is_empty h] returns [true] if the heap [h] is empty. *) - - exception Empty + (** [is_empty h] returns [true] iff the heap [h] is empty. *) val merge : t -> t -> t (** [merge h1 h2] merges the two heaps [h1] and [h2]. @@ -56,24 +58,35 @@ module type S = sig (** [add h x] is [insert x h]. *) val find_min : t -> elt option - (** [find_min h] find the minimal element of the heap [h]. + (** [find_min h] returns the minimal element of [h], + or [None] if [h] is empty. Complexity: [O(1)]. *) val find_min_exn : t -> elt - (** [find_min_exn h] is like {!find_min} but can fail. + (** [find_min_exn h] is akin to {!find_min}, + but it raises {!Empty} when the heap is empty. @raise Empty if the heap is empty. *) val take : t -> (t * elt) option - (** [take h] extracts and returns the minimum element, and the new heap (without - this element), or [None] if the heap [h] is empty. + (** [take h] returns the minimum element of [h] + and the new heap without this element, + or [None] if [h] is empty. Complexity: [O(log n)]. *) val take_exn : t -> t * elt - (** [take_exn h] is like {!take}, but can fail. + (** [take_exn h] is akin to {!take}, + but it raises {!Empty} when the heap is empty. @raise Empty if the heap is empty. *) + val size : t -> int + (** [size h] is the number of elements in the heap [h]. + Complexity: [O(n)]. + *) + + (** {2 Deleting elements} *) + val delete_one : (elt -> elt -> bool) -> elt -> t -> t (** [delete_one eq x h] deletes an occurrence of the value [x] from the heap [h], if there is some. @@ -86,27 +99,25 @@ module type S = sig (** [delete_all eq x h] deletes all occurrences of the value [x] from the heap [h]. If [h] does not contain [x], then [h] itself is returned. Elements are identified by the equality function [eq]. - By contrast with {!filter}, [delete_all] stops as soon as - it enters a subtree whose root is greater than [x]. + This function is more efficient than {!filter} + because it avoids considering elements greater than [x]. Complexity: [O(n)]. @since 2.0 *) val filter : (elt -> bool) -> t -> t - (** [filter p h] filters values, only retaining the ones that satisfy the predicate [p]. + (** [filter p h] filters the elements of [h], + only retaining those that satisfy the predicate [p]. If no element in [h] satisfies [p], then [h] itself is returned. Complexity: [O(n)]. *) + (** {2 Iterating on elements} *) + val iter : (elt -> unit) -> t -> unit - (** [iter f h] iterates over the heap [h] invoking [f] with the current element. *) + (** [iter f h] invokes [f] on every element of the heap [h]. *) val fold : ('a -> elt -> 'a) -> 'a -> t -> 'a - (** [fold f acc h] folds on all values of [h]. *) - - val size : t -> int - (** [size h] is the number of elements in the heap [h]. - Complexity: [O(n)]. - *) + (** [fold f acc h] folds on all elements of [h]. *) (** {2 Adding many elements at once} *) @@ -114,13 +125,14 @@ module type S = sig (** [add_list h l] adds the elements of the list [l] into the heap [h]. An element occurring several times will be added that many times to the heap. Elements need not be given in any particular order. + This function is more efficient than repeated insertions. Complexity: [O(log m + n)] where [m] and [n] are the number of elements in [h] and [l], respectively. @since 0.16 *) val add_iter : t -> elt iter -> t (** [add_iter h iter] is akin to {!add_list}, - but taking an [iter] of elements as input. + but taking an {!type:iter} of elements as input. @since 2.8 *) val add_seq : t -> elt Seq.t -> t @@ -131,32 +143,33 @@ module type S = sig val add_gen : t -> elt gen -> t (** [add_gen h gen] is akin to {!add_list}, - but taking a [gen] of elements as input. + but taking a {!type:gen} of elements as input. @since 0.16 *) (** {2 Conversions} *) val of_list : elt list -> t - (** [of_list l] builds a heap from a given list of elements. - It is equivalent to [add_list empty l]. + (** [of_list l] builds a heap from the list of elements [l]. Elements need not be given in any particular order. + This function is more efficient than repeated insertions. + It is equivalent to {!add_list}[ empty l]. Complexity: [O(n)]. *) val of_iter : elt iter -> t (** [of_iter iter] is akin to {!of_list}, - but taking an [iter] of elements as input. + but taking an {!type:iter} of elements as input. @since 2.8 *) val of_seq : elt Seq.t -> t (** [of_seq seq] is akin to {!of_list}, but taking a [Seq.t] of elements as input. - Renamed from [of_seq] since 3.0. + Renamed from [of_std_seq] since 3.0. @since 3.0 *) val of_gen : elt gen -> t (** [of_gen gen] is akin to {!of_list}, - but taking a [gen] of elements as input. *) + but taking a {!type:gen} of elements as input. *) val to_list : t -> elt list (** [to_list h] returns a list of the elements of the heap [h], @@ -165,16 +178,16 @@ module type S = sig *) val to_iter : t -> elt iter - (** [to_iter h] is akin to {!to_list}, but returning an [iter] of elements. + (** [to_iter h] is akin to {!to_list}, but returning an {!type:iter} of elements. @since 2.8 *) val to_seq : t -> elt Seq.t - (** [to_seq h] is akin to {!to_list}, but returning a [Seq.t] of elements + (** [to_seq h] is akin to {!to_list}, but returning a [Seq.t] of elements. Renamed from [to_std_seq] since 3.0. @since 3.0 *) val to_gen : t -> elt gen - (** [to_gen h] is akin to {!to_list}, but returning a [gen] of elements. *) + (** [to_gen h] is akin to {!to_list}, but returning a {!type:gen} of elements. *) val to_list_sorted : t -> elt list (** [to_list_sorted h] returns the list of elements of the heap [h] @@ -184,7 +197,7 @@ module type S = sig val to_iter_sorted : t -> elt iter (** [to_iter_sorted h] is akin to {!to_list_sorted}, - but returning an [iter] of elements. + but returning an {!type:iter} of elements. @since 2.8 *) val to_seq_sorted : t -> elt Seq.t @@ -194,7 +207,7 @@ module type S = sig @since 3.0 *) val to_tree : t -> elt ktree - (** [to_tree h] returns a [ktree] of the elements of the heap [h]. + (** [to_tree h] returns a {!type:ktree} of the elements of the heap [h]. The layout is not specified. Complexity: [O(n)]. *) From 5d315503e11840de60bf60c059e32643bcc88576 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Glen=20M=C3=A9vel?= Date: Fri, 9 Aug 2024 14:45:22 +0200 Subject: [PATCH 14/14] CCHeap: building from almost-sorted sequences --- CHANGELOG.md | 1 + src/core/CCHeap.ml | 50 +++++++++++++++++++++++++++++++++++++++++++++ src/core/CCHeap.mli | 23 +++++++++++++++++++++ 3 files changed, 74 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 00f34f5f2..8df6e836c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## main +- `CCHeap`: building a heap from an almost-sorted sequence - perf: `CCHeap`: building a heap from n elements is now in time O(n) instead of O(n log n) - perf: `CCHeap`: `filter` and `delete_all` are now in time O(n) diff --git a/src/core/CCHeap.ml b/src/core/CCHeap.ml index 28fc9e835..325dd09ca 100644 --- a/src/core/CCHeap.ml +++ b/src/core/CCHeap.ml @@ -150,6 +150,14 @@ module type S = sig but taking a [gen] of elements as input. @since 0.16 *) + val add_iter_almost_sorted : t -> elt iter -> t + (** [add_iter_almost_sorted h iter] is equivalent to + [merge h (of_iter_almost_sorted iter)]. + See {!of_iter_almost_sorted}. + Complexity: [O(log m + n)]. + @since NEXT_RELEASE + *) + (** {2 Conversions} *) val of_list : elt list -> t @@ -175,6 +183,20 @@ module type S = sig (** [of_gen gen] is akin to {!of_list}, but taking a [gen] of elements as input. *) + val of_iter_almost_sorted : elt iter -> t + (** [of_iter iter] builds a heap from the {!type:iter} sequence of elements. + Elements need not be given in any particular order. + However, the heap takes advantage of partial sorting found in the input: + the closer the input sequence is to being sorted, + the more efficient it is to convert the heap to a sorted sequence. + This enables heap-sorting that is faster than [O(n log n)] + when the input is almost sorted. + In the best case, when only a constant number of elements are misplaced, + then successive {!take} run in [O(1)], + and {!to_list_sorted} runs in [O(n)]. + Complexity: [O(n)]. + *) + val to_list : t -> elt list (** [to_list h] returns a list of the elements of the heap [h], in no particular order. @@ -366,6 +388,32 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct let of_seq xs = of_iter (fun k -> Seq.iter k xs) let of_gen xs = of_iter (fun k -> _gen_iter k xs) + (* When input values are sorted in reverse order, then repeated insertions in + a leftist heap run in time O(n) and build a list-like heap where elements + are totally sorted, which makes a subsequent conversion to sorted sequence + run in O(n). *) + let _of_list_rev_sorted (xs : elt list) : t = + List.fold_left (fun h x -> N (1, x, h, E)) E xs + + (* We use this to convert an arbitrary input sequence to a heap in time O(n), + while achieving an efficient heap structure in the common situation when + the input is almost sorted. This improves heap-sorting, for instance. *) + let of_iter_almost_sorted xs = + let sorted_chunk = ref [] in + let iter_sorted_heaps k = + xs begin fun x -> + begin match !sorted_chunk with + | (y :: _) as ys when not (E.leq y x) -> + k (_of_list_rev_sorted ys) ; + sorted_chunk := [x] + | ys -> + sorted_chunk := x :: ys + end ; + end ; + k (_of_list_rev_sorted !sorted_chunk) + in + _merge_heap_iter iter_sorted_heaps + (** {2 Adding many elements at once} *) let add_list h xs = merge h (of_list xs) @@ -373,6 +421,8 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct let add_seq h xs = merge h (of_seq xs) let add_gen h xs = merge h (of_gen xs) + let add_iter_almost_sorted h xs = merge h (of_iter_almost_sorted xs) + (** {2 Conversions to sequences} *) let to_list h = diff --git a/src/core/CCHeap.mli b/src/core/CCHeap.mli index a496998a6..40c343b17 100644 --- a/src/core/CCHeap.mli +++ b/src/core/CCHeap.mli @@ -146,6 +146,14 @@ module type S = sig but taking a {!type:gen} of elements as input. @since 0.16 *) + val add_iter_almost_sorted : t -> elt iter -> t + (** [add_iter_almost_sorted h iter] is equivalent to + [merge h (of_iter_almost_sorted iter)]. + See {!of_iter_almost_sorted}. + Complexity: [O(log m + n)]. + @since NEXT_RELEASE + *) + (** {2 Conversions} *) val of_list : elt list -> t @@ -171,6 +179,21 @@ module type S = sig (** [of_gen gen] is akin to {!of_list}, but taking a {!type:gen} of elements as input. *) + val of_iter_almost_sorted : elt iter -> t + (** [of_iter iter] builds a heap from the {!type:iter} sequence of elements. + Elements need not be given in any particular order. + However, the heap takes advantage of partial sorting found in the input: + the closer the input sequence is to being sorted, + the more efficient it is to convert the heap to a sorted sequence. + This enables heap-sorting that is faster than [O(n log n)] + when the input is almost sorted. + In the best case, when only a constant number of elements are misplaced, + then successive {!take} run in [O(1)], + and {!to_list_sorted} runs in [O(n)]. + Complexity: [O(n)]. + @since NEXT_RELEASE + *) + val to_list : t -> elt list (** [to_list h] returns a list of the elements of the heap [h], in no particular order.