-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutil_list.ml
503 lines (445 loc) · 12.6 KB
/
util_list.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
(*
Various utilities for handling lists
*)
(*
Make a custom comparison function based on a mapping to integers.
This is useful when the default order assumed by
Pervasives.compare is unsatisfying (or unspecified).
Example 1: variants
let cmp =
compare_int (function
| `Small -> 0
| `Medium -> 1
| `Large -> 2
) in
List.sort cmp l
Example 2: sort by decreasing length
let cmp =
compare_int (fun x -> - (String.length x.name))
*)
let compare_int : ('a -> int) -> 'a -> 'a -> int =
fun to_int a b ->
compare (to_int a) (to_int b)
let compare_float : ('a -> float) -> 'a -> 'a -> int =
fun to_float a b ->
compare (to_float a) (to_float b)
(*
Compose a comparison function from a list of comparison functions.
Example: sort by decreasing age, then by name
let cmp =
compare_by [
compare_int (fun x -> - x.age);
String.compare;
]
*)
let rec compare_by cmp_list a b =
match cmp_list with
| [] -> 0
| cmp :: fallback_cmp ->
let c = cmp a b in
if c <> 0 then c
else
compare_by fallback_cmp a b
let sort_full ?(compare = compare) get_key l =
let kv_list = List.rev_map (fun v -> (get_key v, v)) l in
let kv_list = List.sort (fun (k1, v1) (k2, v2) -> compare k2 k1) kv_list in
List.rev_map snd kv_list
let test_sort_full () =
sort_full String.lowercase_ascii ["C"; "A"; "b"] = ["A"; "b"; "C"]
(*
Remove from the first all duplicate elements based on their keys
as well as elements whose keys appear in the second list,
proceeding from left to right.
*)
let diff_full get_key1 get_key2 l1 l2 =
let tbl = Hashtbl.create (2 * List.length l1) in
List.iter (fun x ->
let k = get_key2 x in
Hashtbl.replace tbl k ()
) l2;
let r =
List.fold_left (fun acc x ->
let k = get_key1 x in
if Hashtbl.mem tbl k then acc
else (
Hashtbl.add tbl k ();
x :: acc
)
) [] l1
in
List.rev r
(* diff [1;2;3;4] [9;3;5;1] = [2;4] *)
let diff l1 l2 =
diff_full (fun x -> x) (fun x -> x) l1 l2
(* Remove duplicate elements,
proceeding from left to right unlike BatList.unique *)
let unique_full get_key l =
diff_full get_key get_key l []
let unique l =
unique_full (fun x -> x) l
let test_unique () =
let input = [3;2;5;1;2;3;8;4;8;2] in
let expected_output = [3;2;5;1;8;4] in
unique input = expected_output
(*
Remove duplicates from the union of the lists, proceeding
from left to right:
union_full (fun x -> x) [1; 2; 3] [4; 3; 5] = [1; 2; 3; 4; 5]
*)
let union_full get_key l1 l2 = unique_full get_key (l1 @ l2)
let union l1 l2 = union_full (fun x -> x) l1 l2
(*
Return a list of elements from the first list whose keys
exist in the second list as well.
Duplicates are removed.
The result is the first list from which elements have been removed,
i.e. the order of the first list is preserved.
*)
let inter_full get_key1 get_key2 l1 l2 =
let tbl = Hashtbl.create (2 * List.length l2) in
List.iter (fun x ->
let k = get_key2 x in
Hashtbl.replace tbl k x
) l2;
List.filter (fun x ->
let k = get_key1 x in
let b = Hashtbl.mem tbl k in
if b then
Hashtbl.remove tbl k;
b
) l1
let inter l1 l2 =
let l1, l2 =
(* optimize by building a hash table from the shortest list *)
if List.length l1 <= List.length l2 then
l1, l2
else
l2, l1
in
inter_full
(fun x -> x)
(fun x -> x)
l1 l2
let test_inter () =
let result = inter [1;2;3;4;3] [5;4;6;1;4;3] in
let expected = [1;3;4] in
List.sort compare result = List.sort compare expected
(*
Sort a list of values according to some order specified by a list of
keys.
Values whose key doesn't appear in the list of keys are moved to the end.
*)
let reorder get_key keys values =
let tbl = Hashtbl.create (2 * List.length keys) in
BatList.iteri (fun i k ->
if not (Hashtbl.mem tbl k) then
Hashtbl.add tbl k i
) keys;
let default = List.length keys in
let l =
BatList.map (fun v ->
try (Hashtbl.find tbl (get_key v), v)
with Not_found -> (default, v)
) values
in
let l = BatList.stable_sort (fun (i, _) (j, _) -> compare i j) l in
BatList.map snd l
let test_reorder () =
let f keys values = reorder (fun k -> k) keys values in
(match f [4;2;6;3] [1;2;3;4;5;6;7] with
| 4 :: 2 :: 6 :: 3 :: tail ->
assert (List.sort compare tail = [1; 5; 7])
| _ ->
assert false
);
(match f [6;4;2;6;3] [1;1;2;3;4;5;6;7] with
| 6 :: 6 :: 4 :: 2 :: 3 :: tail ->
assert (List.sort compare tail = [1; 1; 5; 7])
| _ ->
assert false
);
true
let group_by_key pair_list =
let tbl = Hashtbl.create (List.length pair_list) in
List.iter (fun (k, v) ->
let r =
try Hashtbl.find tbl k
with Not_found ->
let r = ref [] in
Hashtbl.add tbl k r;
r
in
r := v :: !r
) pair_list;
Hashtbl.fold (fun k r acc -> (k, List.rev !r) :: acc) tbl []
let test_group_by_key () =
List.sort compare (group_by_key [1,2;
2,4;
1,5;
3,6;
3,7])
= List.sort compare [1, [2; 5];
2, [4];
3, [6; 7]]
(*
Group elements with an identical key,
which must be hashable and comparable (usable as key with Hashtbl).
The `get_representative` function must return a key and a value
that is usable as the representative of the cluster.
*)
let group_by get_representative l =
let pair_list =
List.rev_map (fun x ->
let k, v = get_representative x in
(k, (v, x))
) l
in
let groups = group_by_key pair_list in
BatList.map (fun (k, l) ->
match l with
| [] -> assert false
| (v, x) :: _ -> (v, BatList.map snd l)
) groups
let test_group_by () =
let get_representative (k, v) = (k, 10 + v) in
let l =
group_by get_representative
[1,2;
1,4]
in
match l with
| [ (12|14), ([1,2; 1,4]|[1,4; 1,2]) ] -> true
| _ -> false
(*
Split list into uniques and the rest.
*)
let split_unique_full get_key l =
let tbl = Hashtbl.create (2 * List.length l) in
BatList.partition (fun x ->
let k = get_key x in
if Hashtbl.mem tbl k then false
else (
Hashtbl.add tbl k ();
true
)
) l
let unique_first_full get_key l =
let uniques, other = split_unique_full get_key l in
uniques @ other
let unique_first l =
unique_first_full (fun x -> x) l
let test_unique_first () =
let f l = unique_first_full floor l in
assert (f [] = []);
assert (f [123.] = [123.]);
assert (f [1.1; 1.; 2.] = [1.1; 2.; 1.]);
assert (f [5.; 5.1; 2.; 3.; 3.1; 2.1; 4.] = [5.; 2.; 3.; 4.; 5.1; 3.1; 2.1]);
true
(*
Find the minimum of a non-empty list according to the given
comparison function `cmp` (e.g. Pervasives.compare).
`list_first cmp l` is equivalent to `List.hd (List.stable_sort cmp l)`
but costs only O(length l).
*)
let get_first cmp l =
match l with
| [] -> assert false
| first :: tail ->
List.fold_left (fun acc x ->
if cmp acc x <= 0 then acc
else x
) first tail
let test_get_first () =
assert (get_first compare [3;2;1;5;4] = 1);
assert (get_first (fun a b -> compare b a) [3;2;1;5;4] = 5);
true
(*
Get an element with the most common property specified as the key k
in each pair (k, v). A sample value is returned with the key.
*)
let get_majority pair_list =
let clusters = group_by_key pair_list in
let counts =
BatList.map (fun (k, vl) ->
match vl with
| [] -> assert false
| first :: _ -> (List.length vl, (k, first))
) clusters
in
match clusters with
| [] ->
invalid_arg "Util_list.get_majority"
| l ->
snd (get_first (fun (n1, _) (n2, _) -> compare n2 n1) counts)
let test_get_majority () =
let k, v = get_majority [ 1, "a";
2, "b";
2, "c";
2, "d";
3, "e";
3, "f" ]
in
assert (k = 2);
assert (
match v with
| "b" | "c" | "d" -> true
| _ -> false
);
true
(*
If possible, get a pair (key, value) whose key matches Some _
and is the most frequent in the input list, otherwise return
a pair whose key is None.
*)
let get_opt_majority pair_list =
let nones, somes = BatList.partition (fun (o, v) -> o = None) pair_list in
match somes with
| [] -> get_majority nones
| _ -> get_majority somes
let test_get_opt_majority () =
let k, v = get_opt_majority [ Some 1, "a";
None, "b";
None, "c";
None, "d";
Some 3, "e";
Some 3, "f";
Some 4, "g" ]
in
assert (k = Some 3);
assert (
match v with
| "e" | "f" -> true
| _ -> false
);
let k, v = get_opt_majority [ None, "a"; None, "b" ] in
assert (k = None);
true
(*
Put a list of items into a hash table, removing duplicates.
*)
let make_kv_table l get_kv =
let tbl = Hashtbl.create (List.length l) in
List.iter (fun x ->
let k, v = get_kv x in
Hashtbl.replace tbl k v
) l;
tbl
(* Simpler interface to `make_kv_table` *)
let to_kv_table pairs =
make_kv_table pairs (fun x -> x)
(* Simpler interface to `make_kv_table` *)
let to_table l get_key =
make_kv_table l (fun x -> (get_key x, x))
(* Simpler interface to `to_kv_table`, storing no value.
Meant to be used with `Hashtbl.mem` for efficient existence. *)
let to_mem_table l =
make_kv_table l (fun k -> (k, ()))
let optimum l prefer_right_arg =
match l with
| [] -> invalid_arg "Util_list.optimum"
| first :: rest ->
List.fold_left (fun acc x ->
if prefer_right_arg acc x then x
else acc
) first rest
let optional f l g =
match l with
| [] -> None
| l -> Some (f l g)
(* Find the maximum element of a non-empty list,
preferring the leftmost occurrence. *)
let maximum l cmp =
optimum l (fun a b -> cmp a b < 0)
(* Same as `maximum`, returning None if the list is empty. *)
let opt_maximum l cmp =
optional maximum l cmp
(* Find the minimum element of a non-empty list,
preferring the leftmost occurrence. *)
let minimum l cmp =
optimum l (fun a b -> cmp a b > 0)
(* Same as `minimum`, returning None if the list is empty. *)
let opt_minimum l cmp =
optional minimum l cmp
let test_optimum () =
assert (
maximum [0.; 1.1; 1.2; 0.] (fun a b -> compare (truncate a) (truncate b))
= 1.1
);
assert (
minimum [1.; 0.2; 0.1; 1.] (fun a b -> compare (truncate a) (truncate b))
= 0.2
);
true
(*
Return true if a predicate matches at least n elements of the list.
`List.exists f lst` is equivalent to `List.exists_n f 1 lst`.
*)
let rec exists_n n l f =
if n <= 0 then
true
else
match l with
| [] ->
false
| x :: l ->
let n =
if f x then (n-1)
else n
in
exists_n n l f
let test_exists_n () =
let f x = x = 0 in
assert (exists_n 3 [1;2;3;0;1;0;1;0;0] f);
assert (exists_n 3 [0;0;0] f);
assert (not (exists_n 1 [] f));
assert (not (exists_n 1 [5] f));
assert (exists_n 0 [5] f);
assert (exists_n 0 [] f);
true
let shuffle l =
let a = Array.of_list l in
let len = Array.length a in
for i = 0 to len - 2 do
let j = len - Random.int (len - i) - 1 in
let x = a.(i) in
a.(i) <- a.(j);
a.(j) <- x;
done;
Array.to_list a
let test_shuffle () =
assert (shuffle [] = []);
assert (shuffle [0] = [0]);
assert (shuffle [0;0;0] = [0;0;0]);
let l = [1; 2; 3] in
let rev = List.rev l in
let ll = Array.to_list (Array.init 100 (fun _ -> shuffle l)) in
assert (List.exists (fun x -> x = rev) ll);
true
(*
Common functions with arguments in a better order,
and which won't blow the stack
*)
let iter l f = BatList.iter f l
let map l f = BatList.map f l
let fold_left acc l f = BatList.fold_left f acc l
let fold_right l acc f = BatList.fold_right f l acc
let filter l f = BatList.filter f l
let filter_map l f = BatList.filter_map f l
let for_all l f = BatList.for_all f l
let exists l f = BatList.exists f l
let find l f = BatList.find f l
let tests = [
"unique first", test_unique_first;
"get_first", test_get_first;
"get_majority", test_get_majority;
"get_opt_majority", test_get_opt_majority;
"sort", test_sort_full;
"unique", test_unique;
"inter", test_inter;
"reorder", test_reorder;
"group by key", test_group_by_key;
"group by", test_group_by;
"optimum", test_optimum;
"exists_n", test_exists_n;
"shuffle", test_shuffle;
]