diff --git a/CHANGELOG.md b/CHANGELOG.md index d4c8e84d..a9a236e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # Changelog +## main + +- `CCHeap`: building a heap from an almost-sorted sequence +- perf: `CCHeap`: building a heap from n elements is now in time O(n) + instead of O(n log n) +- perf: `CCHeap`: `filter` and `delete_all` are now in time O(n) + instead of O(n log n), and they ensure physical equality + (for `delete_all` this is a bugfix) + ## 3.14 diff --git a/README.md b/README.md index 072460c2..ea930024 100644 --- a/README.md +++ b/README.md @@ -539,7 +539,7 @@ val h' : IntHeap.t = val x : int = 2 # IntHeap.to_list h' (* see, 2 is removed *);; -- : int list = [4; 6; 8; 10] +- : int list = [4; 8; 10; 6] ``` ### IO helpers diff --git a/src/core/CCHeap.ml b/src/core/CCHeap.ml index dbd04824..325dd09c 100644 --- a/src/core/CCHeap.ml +++ b/src/core/CCHeap.ml @@ -7,6 +7,14 @@ type 'a gen = unit -> 'a option type 'a printer = Format.formatter -> 'a -> unit type 'a ktree = unit -> [ `Nil | `Node of 'a * 'a ktree list ] +let[@inline] _iter_map f xs k = xs (fun x -> k (f x)) + +let rec _gen_iter k g = + begin match g () with + | None -> () + | Some x -> k x; _gen_iter k g + end + module type PARTIAL_ORD = sig type t @@ -28,125 +36,209 @@ module type S = sig type elt type t + exception Empty + + (** {2 Basing heap operations} *) + val empty : t (** Empty heap. *) val is_empty : t -> bool (** Is the heap empty? *) - exception Empty - val merge : t -> t -> t - (** Merge two heaps. *) + (** [merge h1 h2] merges the two heaps [h1] and [h2]. + If one heap is empty, the result is physically equal to the other heap. + Complexity: [O(log (m+n))] where [m] and [n] are the number of elements in each heap. + *) val insert : elt -> t -> t - (** Insert a value in the heap. *) + (** [insert x h] inserts an element [x] into the heap [h]. + Complexity: [O(log n)] where [n] is the number of elements in [h]. + *) val add : t -> elt -> t - (** Synonym to {!insert}. *) - - val filter : (elt -> bool) -> t -> t - (** Filter values, only retaining the ones that satisfy the predicate. - Linear time at least. *) + (** [add h x] is [insert x h]. *) val find_min : t -> elt option - (** Find minimal element. *) + (** [find_min h] returns the minimal element of [h], + or [None] if [h] is empty. + Complexity: [O(1)]. + *) val find_min_exn : t -> elt - (** Like {!find_min} but can fail. + (** [find_min_exn h] is akin to {!find_min}, + but it raises {!Empty} when the heap is empty. @raise Empty if the heap is empty. *) val take : t -> (t * elt) option - (** Extract and return the minimum element, and the new heap (without - this element), or [None] if the heap is empty. *) + (** [take h] returns the minimum element of [h] + and the new heap without this element, + or [None] if [h] is empty. + Complexity: [O(log n)]. + *) val take_exn : t -> t * elt - (** Like {!take}, but can fail. + (** [take_exn h] is akin to {!take}, + but it raises {!Empty} when the heap is empty. @raise Empty if the heap is empty. *) + val size : t -> int + (** [size h] is the number of elements in the heap [h]. + Complexity: [O(n)]. + *) + + (** {2 Deleting elements} *) + val delete_one : (elt -> elt -> bool) -> elt -> t -> t - (** Delete one occurrence of a value if it exist in the heap. - [delete_one eq x h], use [eq] to find one [x] in [h] and delete it. - If [h] do not contain [x] then it return [h]. + (** [delete_one eq x h] deletes an occurrence of the value [x] from the heap + [h], + if there is some. + If [h] does not contain [x], then [h] itself is returned. + Elements are identified by the equality function [eq]. + Complexity: [O(n)]. @since 2.0 *) val delete_all : (elt -> elt -> bool) -> elt -> t -> t - (** Delete all occurrences of a value in the heap. - [delete_all eq x h], use [eq] to find all [x] in [h] and delete them. - If [h] do not contain [x] then it return [h]. - The difference with {!filter} is that [delete_all] stops as soon as - it enters a subtree whose root is bigger than the element. + (** [delete_all eq x h] deletes all occurrences of the value [x] from the heap [h]. + If [h] does not contain [x], then [h] itself is returned. + Elements are identified by the equality function [eq]. + This function is more efficient than {!filter} + because it avoids considering elements greater than [x]. + Complexity: [O(n)]. @since 2.0 *) - val iter : (elt -> unit) -> t -> unit - (** Iterate on elements. *) - - val fold : ('a -> elt -> 'a) -> 'a -> t -> 'a - (** Fold on all values. *) + val filter : (elt -> bool) -> t -> t + (** [filter p h] filters the elements of [h], + only retaining those that satisfy the predicate [p]. + If no element in [h] satisfies [p], then [h] itself is returned. + Complexity: [O(n)]. + *) - val size : t -> int - (** Number of elements (linear complexity). *) + (** {2 Iterating on elements} *) - (** {2 Conversions} *) + val iter : (elt -> unit) -> t -> unit + (** [iter f h] invokes [f] on every element of the heap [h]. *) - val to_list : t -> elt list - (** Return the elements of the heap, in no particular order. *) + val fold : ('a -> elt -> 'a) -> 'a -> t -> 'a + (** [fold f acc h] folds on all elements of [h]. *) - val to_list_sorted : t -> elt list - (** Return the elements in increasing order. - @since 1.1 *) + (** {2 Adding many elements at once} *) val add_list : t -> elt list -> t - (** Add the elements of the list to the heap. An element occurring several - times will be added that many times to the heap. + (** [add_list h l] adds the elements of the list [l] into the heap [h]. + An element occurring several times will be added that many times to the heap. + Elements need not be given in any particular order. + This function is more efficient than repeated insertions. + Complexity: [O(log m + n)] + where [m] and [n] are the number of elements in [h] and [l], respectively. @since 0.16 *) - val of_list : elt list -> t - (** [of_list l] is [add_list empty l]. Complexity: [O(n log n)]. *) - val add_iter : t -> elt iter -> t - (** Like {!add_list}. + (** [add_iter h iter] is akin to {!add_list}, + but taking an [iter] of elements as input. @since 2.8 *) val add_seq : t -> elt Seq.t -> t - (** Like {!add_list}. - @since 2.8 *) + (** [add_seq h seq] is akin to {!add_list}, + but taking a [Seq.t] of elements as input. + Renamed from [add_std_seq] since 3.0. + @since 3.0 *) + + val add_gen : t -> elt gen -> t + (** [add_gen h gen] is akin to {!add_list}, + but taking a [gen] of elements as input. + @since 0.16 *) + + val add_iter_almost_sorted : t -> elt iter -> t + (** [add_iter_almost_sorted h iter] is equivalent to + [merge h (of_iter_almost_sorted iter)]. + See {!of_iter_almost_sorted}. + Complexity: [O(log m + n)]. + @since NEXT_RELEASE + *) + + (** {2 Conversions} *) + + val of_list : elt list -> t + (** [of_list l] builds a heap from the list of elements [l]. + Elements need not be given in any particular order. + This function is more efficient than repeated insertions. + It is equivalent to [add_list empty l]. + Complexity: [O(n)]. + *) val of_iter : elt iter -> t - (** Build a heap from a given [iter]. Complexity: [O(n log n)]. + (** [of_iter iter] is akin to {!of_list}, + but taking an [iter] of elements as input. @since 2.8 *) val of_seq : elt Seq.t -> t - (** Build a heap from a given [Seq.t]. Complexity: [O(n log n)]. - @since 2.8 *) + (** [of_seq seq] is akin to {!of_list}, + but taking a [Seq.t] of elements as input. + Renamed from [of_std_seq] since 3.0. + @since 3.0 *) + + val of_gen : elt gen -> t + (** [of_gen gen] is akin to {!of_list}, + but taking a [gen] of elements as input. *) + + val of_iter_almost_sorted : elt iter -> t + (** [of_iter iter] builds a heap from the {!type:iter} sequence of elements. + Elements need not be given in any particular order. + However, the heap takes advantage of partial sorting found in the input: + the closer the input sequence is to being sorted, + the more efficient it is to convert the heap to a sorted sequence. + This enables heap-sorting that is faster than [O(n log n)] + when the input is almost sorted. + In the best case, when only a constant number of elements are misplaced, + then successive {!take} run in [O(1)], + and {!to_list_sorted} runs in [O(n)]. + Complexity: [O(n)]. + *) + + val to_list : t -> elt list + (** [to_list h] returns a list of the elements of the heap [h], + in no particular order. + Complexity: [O(n)]. + *) val to_iter : t -> elt iter - (** Return a [iter] of the elements of the heap. + (** [to_iter h] is akin to {!to_list}, but returning an [iter] of elements. @since 2.8 *) val to_seq : t -> elt Seq.t - (** Return a [Seq.t] of the elements of the heap. - @since 2.8 *) + (** [to_seq h] is akin to {!to_list}, but returning a [Seq.t] of elements. + Renamed from [to_std_seq] since 3.0. + @since 3.0 *) + + val to_gen : t -> elt gen + (** [to_gen h] is akin to {!to_list}, but returning a [gen] of elements. *) + + val to_list_sorted : t -> elt list + (** [to_list_sorted h] returns the list of elements of the heap [h] + in increasing order. + Complexity: [O(n log n)]. + @since 1.1 *) val to_iter_sorted : t -> elt iter - (** Iterate on the elements, in increasing order. + (** [to_iter_sorted h] is akin to {!to_list_sorted}, + but returning an [iter] of elements. @since 2.8 *) val to_seq_sorted : t -> elt Seq.t - (** Iterate on the elements, in increasing order. - @since 2.8 *) - - val add_gen : t -> elt gen -> t - (** @since 0.16 *) - - val of_gen : elt gen -> t - (** Build a heap from a given [gen]. Complexity: [O(n log n)]. *) - - val to_gen : t -> elt gen - (** Return a [gen] of the elements of the heap. *) + (** [to_seq_sorted h] is akin to {!to_list_sorted}, + but returning a [Seq.t] of elements. + Renamed from [to_std_seq_sorted] since 3.0. + @since 3.0 *) val to_tree : t -> elt ktree - (** Return a [ktree] of the elements of the heap. *) + (** [to_tree h] returns a [ktree] of the elements of the heap [h]. + The layout is not specified. + Complexity: [O(n)]. + *) + + (** {2 Pretty-printing} *) val to_string : ?sep:string -> (elt -> string) -> t -> string (** Print the heap in a string @@ -178,6 +270,8 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct exception Empty + let singleton x = N (1, x, E, E) + (* Rank of the tree *) let _rank = function | E -> 0 @@ -203,15 +297,9 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct else _make_node y a2 (merge t1 b2) - let insert x h = merge (N (1, x, E, E)) h + let insert x h = merge (singleton x) h let add h x = insert x h - let rec filter p h = - match h with - | E -> E - | N (_, x, l, r) when p x -> _make_node x (filter p l) (filter p r) - | N (_, _, l, r) -> merge (filter p l) (filter p r) - let find_min_exn = function | E -> raise Empty | N (_, x, _, _) -> x @@ -228,39 +316,6 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct | E -> raise Empty | N (_, x, l, r) -> merge l r, x - let delete_one eq x h = - let rec aux = function - | E -> false, E - | N (_, y, l, r) as h -> - if eq x y then - true, merge l r - else if E.leq y x then ( - let found_left, l1 = aux l in - let found, r1 = - if found_left then - true, r - else - aux r - in - if found then - true, _make_node y l1 r1 - else - false, h - ) else - false, h - in - snd (aux h) - - let rec delete_all eq x = function - | E -> E - | N (_, y, l, r) as h -> - if eq x y then - merge (delete_all eq x l) (delete_all eq x r) - else if E.leq y x then - _make_node y (delete_all eq x l) (delete_all eq x r) - else - h - let rec iter f h = match h with | E -> () @@ -281,7 +336,94 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct | E -> 0 | N (_, _, l, r) -> 1 + size l + size r - (** {2 Conversions} *) + (** {2 Conversions from sequences} *) + + (* Merge an [iter] of k heaps into one. + Instead of folding [merge] in one pass (which would run in time O(k log N) + where k is the number of heaps and N is the total number of elements), it + is more efficient to merge heaps pairwise until only one remains; see e.g. + Robert Tarjan, "Data Structures and Network Algorithms", + Chapter 3.3 "Leftist heaps", 1983. + or: + Chris Okasaki, "Purely Functional Data Structures", + Chapter 3.2 "Leftist heaps", Exercise 3.3, 1998 + This is independent of the representation of heaps, and, as long as merging + is in time O(log n), this runs in time O(k + k*log(N/k)). Notice that this + is a O(k + N) (if k is small wrt. N, this last upper bound is very loose). + The code below uses additional space of only O(log(k)) at any moment; + it avoids storing an intermediate list of length O(k). + When at most one of the input heaps is non-empty, the result is physically + equal to it. *) + let _merge_heap_iter (hs : t iter) : t = + let rec cons_and_merge h0 hs weights = + begin match hs with + | h1 :: hs' when weights land 1 = 0 -> + cons_and_merge (merge h0 h1) hs' (weights lsr 1) + | _ -> + h0 :: hs + end + in + (* the i-th heap in this list is a merger of 2^{w_i} input heaps, each + having gone through w_i merge operations, where the "weights" 2^{w_i} are + strictly increasing wrt. i: *) + let mergers = ref [] in + (* The w_i are the 1-bits in the binary writing of [count], the number of + input heaps merged so far; adding a heap to the mergers works like binary + incrementation: *) + let count = ref 0 in + hs begin fun h -> + incr count ; + mergers := cons_and_merge h !mergers !count ; + end ; + List.fold_left merge E !mergers + + (* To build a heap with n given values, instead of repeated insertions, + it is more efficient to do pairwise merging, running in time O(n). *) + let of_iter xs = + xs + |> _iter_map singleton + |> _merge_heap_iter + + let of_list xs = of_iter (fun k -> List.iter k xs) + let of_seq xs = of_iter (fun k -> Seq.iter k xs) + let of_gen xs = of_iter (fun k -> _gen_iter k xs) + + (* When input values are sorted in reverse order, then repeated insertions in + a leftist heap run in time O(n) and build a list-like heap where elements + are totally sorted, which makes a subsequent conversion to sorted sequence + run in O(n). *) + let _of_list_rev_sorted (xs : elt list) : t = + List.fold_left (fun h x -> N (1, x, h, E)) E xs + + (* We use this to convert an arbitrary input sequence to a heap in time O(n), + while achieving an efficient heap structure in the common situation when + the input is almost sorted. This improves heap-sorting, for instance. *) + let of_iter_almost_sorted xs = + let sorted_chunk = ref [] in + let iter_sorted_heaps k = + xs begin fun x -> + begin match !sorted_chunk with + | (y :: _) as ys when not (E.leq y x) -> + k (_of_list_rev_sorted ys) ; + sorted_chunk := [x] + | ys -> + sorted_chunk := x :: ys + end ; + end ; + k (_of_list_rev_sorted !sorted_chunk) + in + _merge_heap_iter iter_sorted_heaps + + (** {2 Adding many elements at once} *) + + let add_list h xs = merge h (of_list xs) + let add_iter h xs = merge h (of_iter xs) + let add_seq h xs = merge h (of_seq xs) + let add_gen h xs = merge h (of_gen xs) + + let add_iter_almost_sorted h xs = merge h (of_iter_almost_sorted xs) + + (** {2 Conversions to sequences} *) let to_list h = let rec aux acc h = @@ -291,29 +433,6 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct in aux [] h - let to_list_sorted heap = - let rec recurse acc h = - match take h with - | None -> List.rev acc - | Some (h', x) -> recurse (x :: acc) h' - in - recurse [] heap - - let add_list h l = List.fold_left add h l - let of_list l = add_list empty l - - let add_iter h i = - let h = ref h in - i (fun x -> h := insert x !h); - !h - - let add_seq h seq = - let h = ref h in - Seq.iter (fun x -> h := insert x !h) seq; - !h - - let of_iter i = add_iter empty i - let of_seq seq = add_seq empty seq let to_iter h k = iter k h let to_seq h = @@ -326,28 +445,6 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct in aux [ h ] - let to_iter_sorted heap = - let rec recurse h k = - match take h with - | None -> () - | Some (h', x) -> - k x; - recurse h' k - in - fun k -> recurse heap k - - let rec to_seq_sorted h () = - match take h with - | None -> Seq.Nil - | Some (h', x) -> Seq.Cons (x, to_seq_sorted h') - - let rec add_gen h g = - match g () with - | None -> h - | Some x -> add_gen (add h x) g - - let of_gen g = add_gen empty g - let to_gen h = let stack = Stack.create () in Stack.push h stack; @@ -365,11 +462,109 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct in next + let to_list_sorted heap = + let rec recurse acc h = + match take h with + | None -> List.rev acc + | Some (h', x) -> recurse (x :: acc) h' + in + recurse [] heap + + let to_iter_sorted heap = + let rec recurse h k = + match take h with + | None -> () + | Some (h', x) -> + k x; + recurse h' k + in + fun k -> recurse heap k + + let rec to_seq_sorted h () = + match take h with + | None -> Seq.Nil + | Some (h', x) -> Seq.Cons (x, to_seq_sorted h') + let rec to_tree h () = match h with | E -> `Nil | N (_, x, l, r) -> `Node (x, [ to_tree l; to_tree r ]) + (** {2 Filtering} *) + + let rec delete_one eq x0 = function + | N (_, x, l, r) as h when E.leq x x0 -> + if eq x0 x then + merge l r + else begin + let l' = delete_one eq x0 l in + if CCEqual.physical l' l then + let r' = delete_one eq x0 r in + if CCEqual.physical r' r then + h + else + _make_node x l r' + else + _make_node x l' r + end + | h -> h + + let delete_all eq x0 h = + (* Iterates [k] on sub-heaps of [h] whose merger is equal to [h] minus + the deleted elements [x0]; we do this, instead of merging the subheaps + directly, in order to ensure complexity O(n). + When no element is deleted, the iterator does nothing and the function + returns true; this makes sure that the result shares sub-heaps with the + input as much as possible, and ensures physical equality when no element + is deleted. + In [delete_all], by contrast with [filter], we can avoid considering + elements greater than [x0]. As a consequence, the complexity is more + precisely O(k + k log(n/k)), where k is the number of elements not + greater than [x0]. This is a O(n), but it is also a O(k log n), which is + much smaller than O(n) if k is asymptotically smaller than n. + *) + let rec iter_subheaps eq x0 h k = + begin match h with + | N (_, x, l, r) when E.leq x x0 -> + let keep_x = not (eq x0 x) in + let keep_l = iter_subheaps eq x0 l k in + let keep_r = iter_subheaps eq x0 r k in + if keep_x && keep_l && keep_r then + true + else begin + if keep_x then k (singleton x) ; + if keep_l then k l ; + if keep_r then k r ; + false + end + | _ -> true + end + in + _merge_heap_iter (fun k -> if iter_subheaps eq x0 h k then k h) + + let filter p h = + (* similar to [delete_all] *) + let rec iter_subheaps p k h = + begin match h with + | E -> true + | N (_, x, l, r) -> + let keep_x = p x in + let keep_l = iter_subheaps p k l in + let keep_r = iter_subheaps p k r in + if keep_x && keep_l && keep_r then + true + else begin + if keep_x then k (singleton x) ; + if keep_l then k l ; + if keep_r then k r ; + false + end + end + in + _merge_heap_iter (fun k -> if iter_subheaps p k h then k h) + + (** {2 Pretty-printing} *) + let to_string ?(sep = ",") elt_to_string h = to_list_sorted h |> List.map elt_to_string |> String.concat sep diff --git a/src/core/CCHeap.mli b/src/core/CCHeap.mli index 894a7ad2..40c343b1 100644 --- a/src/core/CCHeap.mli +++ b/src/core/CCHeap.mli @@ -33,136 +33,214 @@ module type S = sig type elt type t + exception Empty + + (** {2 Basic heap operations} *) + val empty : t (** [empty] returns the empty heap. *) val is_empty : t -> bool - (** [is_empty h] returns [true] if the heap [h] is empty. *) - - exception Empty + (** [is_empty h] returns [true] iff the heap [h] is empty. *) val merge : t -> t -> t - (** [merge h1 h2] merges the two heaps [h1] and [h2]. *) + (** [merge h1 h2] merges the two heaps [h1] and [h2]. + If one heap is empty, the result is physically equal to the other heap. + Complexity: [O(log (m+n))] where [m] and [n] are the number of elements in each heap. + *) val insert : elt -> t -> t - (** [insert x h] inserts an element [x] into the heap [h]. *) + (** [insert x h] inserts an element [x] into the heap [h]. + Complexity: [O(log n)] where [n] is the number of elements in [h]. + *) val add : t -> elt -> t - (** [add h x] inserts an element [x] into the heap [h]. *) - - val filter : (elt -> bool) -> t -> t - (** [filter p h] filters values, only retaining the ones that satisfy the predicate [p]. - Linear time at least. *) + (** [add h x] is [insert x h]. *) val find_min : t -> elt option - (** [find_min h] find the minimal element of the heap [h]. *) + (** [find_min h] returns the minimal element of [h], + or [None] if [h] is empty. + Complexity: [O(1)]. + *) val find_min_exn : t -> elt - (** [find_min_exn h] is like {!find_min} but can fail. + (** [find_min_exn h] is akin to {!find_min}, + but it raises {!Empty} when the heap is empty. @raise Empty if the heap is empty. *) val take : t -> (t * elt) option - (** [take h] extracts and returns the minimum element, and the new heap (without - this element), or [None] if the heap [h] is empty. *) + (** [take h] returns the minimum element of [h] + and the new heap without this element, + or [None] if [h] is empty. + Complexity: [O(log n)]. + *) val take_exn : t -> t * elt - (** [take_exn h] is like {!take}, but can fail. + (** [take_exn h] is akin to {!take}, + but it raises {!Empty} when the heap is empty. @raise Empty if the heap is empty. *) + val size : t -> int + (** [size h] is the number of elements in the heap [h]. + Complexity: [O(n)]. + *) + + (** {2 Deleting elements} *) + val delete_one : (elt -> elt -> bool) -> elt -> t -> t - (** [delete_one eq x h] uses [eq] to find one occurrence of a value [x] - if it exist in the heap [h], and delete it. - If [h] do not contain [x] then it return [h]. + (** [delete_one eq x h] deletes an occurrence of the value [x] from the heap [h], + if there is some. + If [h] does not contain [x], then [h] itself is returned. + Elements are identified by the equality function [eq]. + Complexity: [O(n)]. @since 2.0 *) val delete_all : (elt -> elt -> bool) -> elt -> t -> t - (** [delete_all eq x h] uses [eq] to find all [x] in [h] and delete them. - If [h] do not contain [x] then it return [h]. - The difference with {!filter} is that [delete_all] stops as soon as - it enters a subtree whose root is bigger than the element. + (** [delete_all eq x h] deletes all occurrences of the value [x] from the heap [h]. + If [h] does not contain [x], then [h] itself is returned. + Elements are identified by the equality function [eq]. + This function is more efficient than {!filter} + because it avoids considering elements greater than [x]. + Complexity: [O(n)]. @since 2.0 *) - val iter : (elt -> unit) -> t -> unit - (** [iter f h] iterates over the heap [h] invoking [f] with the current element. *) - - val fold : ('a -> elt -> 'a) -> 'a -> t -> 'a - (** [fold f acc h] folds on all values of [h]. *) + val filter : (elt -> bool) -> t -> t + (** [filter p h] filters the elements of [h], + only retaining those that satisfy the predicate [p]. + If no element in [h] satisfies [p], then [h] itself is returned. + Complexity: [O(n)]. + *) - val size : t -> int - (** [size h] is the number of elements in the heap [h]. Linear complexity. *) + (** {2 Iterating on elements} *) - (** {2 Conversions} *) + val iter : (elt -> unit) -> t -> unit + (** [iter f h] invokes [f] on every element of the heap [h]. *) - val to_list : t -> elt list - (** [to_list h] returns the elements of the heap [h], in no particular order. *) + val fold : ('a -> elt -> 'a) -> 'a -> t -> 'a + (** [fold f acc h] folds on all elements of [h]. *) - val to_list_sorted : t -> elt list - (** [to_list_sorted h] returns the elements of the heap [h] in increasing order. - @since 1.1 *) + (** {2 Adding many elements at once} *) val add_list : t -> elt list -> t (** [add_list h l] adds the elements of the list [l] into the heap [h]. An element occurring several times will be added that many times to the heap. + Elements need not be given in any particular order. + This function is more efficient than repeated insertions. + Complexity: [O(log m + n)] + where [m] and [n] are the number of elements in [h] and [l], respectively. @since 0.16 *) - val of_list : elt list -> t - (** [of_list l] is [add_list empty l]. Complexity: [O(n log n)]. *) - val add_iter : t -> elt iter -> t - (** [add_iter h iter] is like {!add_list}. + (** [add_iter h iter] is akin to {!add_list}, + but taking an {!type:iter} of elements as input. @since 2.8 *) val add_seq : t -> elt Seq.t -> t - (** [add_seq h seq] is like {!add_list}. + (** [add_seq h seq] is akin to {!add_list}, + but taking a [Seq.t] of elements as input. Renamed from [add_std_seq] since 3.0. @since 3.0 *) + val add_gen : t -> elt gen -> t + (** [add_gen h gen] is akin to {!add_list}, + but taking a {!type:gen} of elements as input. + @since 0.16 *) + + val add_iter_almost_sorted : t -> elt iter -> t + (** [add_iter_almost_sorted h iter] is equivalent to + [merge h (of_iter_almost_sorted iter)]. + See {!of_iter_almost_sorted}. + Complexity: [O(log m + n)]. + @since NEXT_RELEASE + *) + + (** {2 Conversions} *) + + val of_list : elt list -> t + (** [of_list l] builds a heap from the list of elements [l]. + Elements need not be given in any particular order. + This function is more efficient than repeated insertions. + It is equivalent to {!add_list}[ empty l]. + Complexity: [O(n)]. + *) + val of_iter : elt iter -> t - (** [of_iter iter] builds a heap from a given [iter]. Complexity: [O(n log n)]. + (** [of_iter iter] is akin to {!of_list}, + but taking an {!type:iter} of elements as input. @since 2.8 *) val of_seq : elt Seq.t -> t - (** [of_seq seq] builds a heap from a given [Seq.t]. Complexity: [O(n log n)]. - Renamed from [of_seq] since 3.0. + (** [of_seq seq] is akin to {!of_list}, + but taking a [Seq.t] of elements as input. + Renamed from [of_std_seq] since 3.0. @since 3.0 *) + val of_gen : elt gen -> t + (** [of_gen gen] is akin to {!of_list}, + but taking a {!type:gen} of elements as input. *) + + val of_iter_almost_sorted : elt iter -> t + (** [of_iter iter] builds a heap from the {!type:iter} sequence of elements. + Elements need not be given in any particular order. + However, the heap takes advantage of partial sorting found in the input: + the closer the input sequence is to being sorted, + the more efficient it is to convert the heap to a sorted sequence. + This enables heap-sorting that is faster than [O(n log n)] + when the input is almost sorted. + In the best case, when only a constant number of elements are misplaced, + then successive {!take} run in [O(1)], + and {!to_list_sorted} runs in [O(n)]. + Complexity: [O(n)]. + @since NEXT_RELEASE + *) + + val to_list : t -> elt list + (** [to_list h] returns a list of the elements of the heap [h], + in no particular order. + Complexity: [O(n)]. + *) + val to_iter : t -> elt iter - (** [to_iter h] returns a [iter] of the elements of the heap [h]. + (** [to_iter h] is akin to {!to_list}, but returning an {!type:iter} of elements. @since 2.8 *) val to_seq : t -> elt Seq.t - (** [to_seq h] returns a [Seq.t] of the elements of the heap [h]. + (** [to_seq h] is akin to {!to_list}, but returning a [Seq.t] of elements. Renamed from [to_std_seq] since 3.0. @since 3.0 *) - val to_iter_sorted : t -> elt iter - (** [to_iter_sorted h] returns a [iter] by iterating on the elements of [h], + val to_gen : t -> elt gen + (** [to_gen h] is akin to {!to_list}, but returning a {!type:gen} of elements. *) + + val to_list_sorted : t -> elt list + (** [to_list_sorted h] returns the list of elements of the heap [h] in increasing order. + Complexity: [O(n log n)]. + @since 1.1 *) + + val to_iter_sorted : t -> elt iter + (** [to_iter_sorted h] is akin to {!to_list_sorted}, + but returning an {!type:iter} of elements. @since 2.8 *) val to_seq_sorted : t -> elt Seq.t - (** [to_seq_sorted h] returns a [Seq.t] by iterating on the elements of [h], - in increasing order. + (** [to_seq_sorted h] is akin to {!to_list_sorted}, + but returning a [Seq.t] of elements. Renamed from [to_std_seq_sorted] since 3.0. @since 3.0 *) - val add_gen : t -> elt gen -> t - (** [add_gen h gen] adds the gen [gen] to the heap [h]. - @since 0.16 *) - - val of_gen : elt gen -> t - (** [of_gen gen] builds a heap from a given [gen]. Complexity: [O(n log n)]. *) - - val to_gen : t -> elt gen - (** [to_gen h] returns a [gen] of the elements of the heap [h]. *) - val to_tree : t -> elt ktree - (** [to_tree h] returns a [ktree] of the elements of the heap [h]. *) + (** [to_tree h] returns a {!type:ktree} of the elements of the heap [h]. + The layout is not specified. + Complexity: [O(n)]. + *) + + (** {2 Pretty-printing} *) val to_string : ?sep:string -> (elt -> string) -> t -> string - (** [to_string ?sep f h] prints the heap [h] in a string - using [sep] as a given separator (default ",") between each element - (converted to a string using [f]). + (** [to_string ?sep f h] prints the heap [h] to a string, + using [f] to convert elements to strings + and [sep] (default: [","]) as a separator between elements. @since 2.7 *) val pp : @@ -173,17 +251,17 @@ module type S = sig t printer (** [pp ?pp_start ?pp_stop ?pp_sep ppf h] prints [h] on [ppf]. Each element is formatted with [ppf], [pp_start] is called at the beginning, - [pp_stop] is called at the end, [pp_sep] is called between each elements. - By defaults [pp_start] and [pp_stop] does nothing and [pp_sep] defaults to - (fun out -> Format.fprintf out ",@ "). + [pp_stop] is called at the end, [pp_sep] is called between each element. + By default, [pp_start] and [pp_stop] do nothing, and [pp_sep] is + [(fun out -> Format.fprintf out ",@ ")]. Renamed from [print] since 2.0 @since 0.16 *) end module Make (E : PARTIAL_ORD) : S with type elt = E.t -(** A convenient version of [Make] that take a [TOTAL_ORD] instead of +(** A convenient version of [Make] that takes a [TOTAL_ORD] instead of a partially ordered module. - It allow to directly pass modules that implement [compare] - without implementing [leq] explicitly *) + It allows to directly pass modules that implement [compare] + without implementing [leq] explicitly. *) module Make_from_compare (E : TOTAL_ORD) : S with type elt = E.t diff --git a/tests/core/t_heap.ml b/tests/core/t_heap.ml index 6c2ee2ed..cd47f2f3 100644 --- a/tests/core/t_heap.ml +++ b/tests/core/t_heap.ml @@ -2,106 +2,239 @@ open CCHeap module T = (val Containers_testlib.make ~__FILE__ ()) include T +(* A QCheck generator for natural numbers that are not too large (larger than + * [small_nat] but smaller than [big_nat]), with a bias towards smaller numbers. + * This also happens to be what QCheck uses for picking a length for a list + * generated by [QCheck.list]. + * QCheck defines this generator under the name [nat] but does not expose it. *) +let medium_nat = + Q.make ~print:Q.Print.int ~shrink:Q.Shrink.int ~small:(fun _ -> 1) + (fun st -> + let p = Random.State.float st 1. in + if p < 0.5 then Random.State.int st 10 + else if p < 0.75 then Random.State.int st 100 + else if p < 0.95 then Random.State.int st 1_000 + else Random.State.int st 10_000 + ) + +let list_delete_first (x0 : int) (xs : int list) : int list = + let rec aux acc xs = + begin match xs with + | [] -> List.rev acc + | x :: xs' when x = x0 -> List.rev_append acc xs' + | x :: xs' -> aux (x :: acc) xs' + end + in + aux [] xs + module H = CCHeap.Make (struct type t = int - let leq x y = x <= y end) -let rec is_sorted l = - match l with - | [ _ ] | [] -> true - | x :: (y :: _ as l') -> x <= y && is_sorted l' +;; + +t ~name:"of_list, find_min_exn, take_exn" @@ fun () -> + let h = H.of_list [ 5; 4; 3; 4; 1; 42; 0 ] in + assert_equal ~printer:string_of_int 0 (H.find_min_exn h); + let h, x = H.take_exn h in + assert_equal ~printer:string_of_int 0 x; + assert_equal ~printer:string_of_int 1 (H.find_min_exn h); + let h, x = H.take_exn h in + assert_equal ~printer:string_of_int 1 x; + assert_equal ~printer:string_of_int 3 (H.find_min_exn h); + let h, x = H.take_exn h in + assert_equal ~printer:string_of_int 3 x; + assert_equal ~printer:string_of_int 4 (H.find_min_exn h); + let h, x = H.take_exn h in + assert_equal ~printer:string_of_int 4 x; + assert_equal ~printer:string_of_int 4 (H.find_min_exn h); + let h, x = H.take_exn h in + assert_equal ~printer:string_of_int 4 x; + assert_equal ~printer:string_of_int 5 (H.find_min_exn h); + let h, x = H.take_exn h in + assert_equal ~printer:string_of_int 5 x; + assert_equal ~printer:string_of_int 42 (H.find_min_exn h); + let h, x = H.take_exn h in + assert_equal ~printer:string_of_int 42 x; + assert_raises ((=) H.Empty) (fun () -> H.find_min_exn h); + assert_raises ((=) H.Empty) (fun () -> H.take_exn h); + true +;; + +q ~name:"of_list, to_list" + ~count:30 + Q.(list medium_nat) + (fun l -> + (l |> H.of_list |> H.to_list |> List.sort CCInt.compare) + = (l |> List.sort CCInt.compare)) +;; + +q ~name:"of_list, to_list_sorted" + ~count:30 + Q.(list medium_nat) + (fun l -> + (l |> H.of_list |> H.to_list_sorted) + = (l |> List.sort CCInt.compare)) +;; + +(* The remaining tests assume the correctness of + [of_list], [to_list], [to_list_sorted]. *) + +q ~name:"size" + ~count:30 + Q.(list_of_size Gen.small_nat medium_nat) + (fun l -> + (l |> H.of_list |> H.size) + = (l |> List.length)) +;; + +q ~name:"insert" + Q.(pair medium_nat (list medium_nat)) + (fun (x, l) -> + (l |> H.of_list |> H.insert x |> H.to_list_sorted) + = ((x::l) |> List.sort CCInt.compare)) +;; + +q ~name:"merge" + Q.(pair (list medium_nat) (list medium_nat)) + (fun (l1, l2) -> + (H.merge (H.of_list l1) (H.of_list l2) |> H.to_list_sorted) + = ((l1@l2) |> List.sort CCInt.compare)) +;; + +q ~name:"add_list" + Q.(pair (list medium_nat) (list medium_nat)) + (fun (l1, l2) -> + (H.add_list (H.of_list l1) l2 |> H.to_list_sorted) + = ((l1@l2) |> List.sort CCInt.compare)) +;; + +q ~name:"delete_one" + Q.(pair medium_nat (list medium_nat)) + (fun (x, l) -> + (l |> H.of_list |> H.delete_one (=) x |> H.to_list_sorted) + = (l |> list_delete_first x |> List.sort CCInt.compare)) +;; + +q ~name:"delete_all" + Q.(pair medium_nat (list medium_nat)) + (fun (x, l) -> + (l |> H.of_list |> H.delete_all (=) x |> H.to_list_sorted) + = (l |> List.filter ((<>) x) |> List.sort CCInt.compare)) +;; + +q ~name:"filter" + Q.(list medium_nat) + (fun l -> + let p = (fun x -> x mod 2 = 0) in + let l' = l |> H.of_list |> H.filter p |> H.to_list in + List.for_all p l' && List.length l' = List.length (List.filter p l)) +;; + +t ~name:"physical equality" @@ fun () -> + let h = H.of_list [ 5; 4; 3; 4; 1; 42; 0 ] in + assert_bool "physical equality of merge with left empty" + (CCEqual.physical h (H.merge H.empty h)) ; + assert_bool "physical equality of merge with right empty" + (CCEqual.physical h (H.merge h H.empty)) ; + assert_bool "physical equality of delete_one with element lesser than min" + (CCEqual.physical h (H.delete_one (=) (-999) h)) ; + assert_bool "physical equality of delete_one with element between min and max" + (CCEqual.physical h (H.delete_one (=) 2 h)) ; + assert_bool "physical equality of delete_one with element greater than max" + (CCEqual.physical h (H.delete_one (=) 999 h)) ; + assert_bool "physical equality of delete_all with element lesser than min" + (CCEqual.physical h (H.delete_all (=) (-999) h)) ; + assert_bool "physical equality of delete_all with element between min and max" + (CCEqual.physical h (H.delete_all (=) 2 h)) ; + assert_bool "physical equality of delete_all with element greater than max" + (CCEqual.physical h (H.delete_all (=) 999 h)) ; + assert_bool "physical equality of filter" + (CCEqual.physical h (H.filter (fun _ -> true) h)) ; + true +;; + +q ~name:"fold" + Q.(list_of_size Gen.small_nat medium_nat) + (fun l -> + (l |> H.of_list |> H.fold (+) 0) + = (l |> List.fold_left (+) 0)) +;; + +q ~name:"of_iter" + Q.(list_of_size Gen.small_nat medium_nat) + (fun l -> + (l |> CCList.to_iter |> H.of_iter |> H.to_list_sorted) + = (l |> List.sort CCInt.compare)) +;; -let extract_list = H.to_list_sorted;; +q ~name:"of_seq" + Q.(list_of_size Gen.small_nat medium_nat) + (fun l -> + (l |> CCList.to_seq |> H.of_seq |> H.to_list_sorted) + = (l |> List.sort CCInt.compare)) +;; -t @@ fun () -> -let h = H.of_list [ 5; 3; 4; 1; 42; 0 ] in -let h, x = H.take_exn h in -assert_equal ~printer:string_of_int 0 x; -let h, x = H.take_exn h in -assert_equal ~printer:string_of_int 1 x; -let h, x = H.take_exn h in -assert_equal ~printer:string_of_int 3 x; -let h, x = H.take_exn h in -assert_equal ~printer:string_of_int 4 x; -let h, x = H.take_exn h in -assert_equal ~printer:string_of_int 5 x; -let h, x = H.take_exn h in -assert_equal ~printer:string_of_int 42 x; -assert_raises - (function - | H.Empty -> true - | _ -> false) - (fun () -> H.take_exn h); -true +q ~name:"of_gen" + Q.(list_of_size Gen.small_nat medium_nat) + (fun l -> + (l |> CCList.to_gen |> H.of_gen |> H.to_list_sorted) + = (l |> List.sort CCInt.compare)) ;; -q ~count:30 - Q.(list_of_size Gen.(return 1_000) int) +q ~name:"to_iter" + Q.(list_of_size Gen.small_nat medium_nat) (fun l -> - (* put elements into a heap *) - let h = H.of_iter (Iter.of_list l) in - assert_equal 1_000 (H.size h); - let l' = extract_list h in - is_sorted l') + (l |> H.of_list |> H.to_iter |> CCList.of_iter |> List.sort CCInt.compare) + = (l |> List.sort CCInt.compare)) ;; -(* test filter *) -q ~count:30 - Q.(list_of_size Gen.(return 1_000) int) +q ~name:"to_seq" + Q.(list_of_size Gen.small_nat medium_nat) (fun l -> - (* put elements into a heap *) - let h = H.of_iter (Iter.of_list l) in - let h = H.filter (fun x -> x mod 2 = 0) h in - assert (H.to_iter h |> Iter.for_all (fun x -> x mod 2 = 0)); - let l' = extract_list h in - is_sorted l') + (l |> H.of_list |> H.to_seq |> CCList.of_seq |> List.sort CCInt.compare) + = (l |> List.sort CCInt.compare)) ;; -q - Q.(list_of_size Gen.(return 1_000) int) +q ~name:"to_gen" + Q.(list_of_size Gen.small_nat medium_nat) (fun l -> - (* put elements into a heap *) - let h = H.of_iter (Iter.of_list l) in - let l' = H.to_iter_sorted h |> Iter.to_list in - is_sorted l') + (l |> H.of_list |> H.to_gen |> CCList.of_gen |> List.sort CCInt.compare) + = (l |> List.sort CCInt.compare)) ;; -q - Q.(list int) +q ~name:"to_iter_sorted" + Q.(list_of_size Gen.small_nat medium_nat) (fun l -> - extract_list (H.of_list l) = extract_list (H.of_gen (CCList.to_gen l))) + (l |> H.of_list |> H.to_iter_sorted |> Iter.to_list) + = (l |> List.sort CCInt.compare)) ;; -q - Q.(list int) +q ~name:"to_seq_sorted" + Q.(list_of_size Gen.small_nat medium_nat) (fun l -> - let h = H.of_list l in - H.to_gen h |> CCList.of_gen |> List.sort Stdlib.compare - = (H.to_list h |> List.sort Stdlib.compare)) + (l |> H.of_list |> H.to_seq_sorted |> CCList.of_seq |> List.sort CCInt.compare) + = (l |> List.sort CCInt.compare)) ;; -q - Q.(list int) +q ~name:"to_string with default sep" + Q.(list_of_size Gen.small_nat medium_nat) (fun l -> - let h = H.of_list l in - H.to_string string_of_int h - = (List.sort Stdlib.compare l |> List.map string_of_int |> String.concat ",")) + (l |> H.of_list |> H.to_string string_of_int) + = (l |> List.sort CCInt.compare |> List.map string_of_int |> String.concat ",")) ;; -q - Q.(list int) +q ~name:"to_string with space as sep" + Q.(list_of_size Gen.small_nat medium_nat) (fun l -> - let h = H.of_list l in - H.to_string ~sep:" " string_of_int h - = (List.sort Stdlib.compare l |> List.map string_of_int |> String.concat " ")) + (l |> H.of_list |> H.to_string ~sep:" " string_of_int) + = (l |> List.sort CCInt.compare |> List.map string_of_int |> String.concat " ")) ;; -q - Q.(list_of_size Gen.(return 1_000) int) +q ~name:"Make_from_compare" + Q.(list_of_size Gen.small_nat medium_nat) (fun l -> let module H' = Make_from_compare (CCInt) in - let h = H'.of_list l in - let l' = H'.to_list_sorted h in - is_sorted l') + (l |> H'.of_list |> H'.to_list_sorted) + = (l |> List.sort CCInt.compare))