-
-
Notifications
You must be signed in to change notification settings - Fork 2k
/
walk.rs
1565 lines (1440 loc) · 51.3 KB
/
walk.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
use std::cmp;
use std::ffi::{OsStr, OsString};
use std::fmt;
use std::fs::{self, FileType, Metadata};
use std::io;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::thread;
use std::time::Duration;
use std::vec;
use crossbeam::sync::MsQueue;
use walkdir::{self, WalkDir, WalkDirIterator, is_same_file};
use dir::{Ignore, IgnoreBuilder};
use gitignore::GitignoreBuilder;
use overrides::Override;
use types::Types;
use {Error, PartialErrorBuilder};
/// A directory entry with a possible error attached.
///
/// The error typically refers to a problem parsing ignore files in a
/// particular directory.
#[derive(Debug)]
pub struct DirEntry {
dent: DirEntryInner,
err: Option<Error>,
}
impl DirEntry {
/// The full path that this entry represents.
pub fn path(&self) -> &Path {
self.dent.path()
}
/// Whether this entry corresponds to a symbolic link or not.
pub fn path_is_symbolic_link(&self) -> bool {
self.dent.path_is_symbolic_link()
}
/// Returns true if and only if this entry corresponds to stdin.
///
/// i.e., The entry has depth 0 and its file name is `-`.
pub fn is_stdin(&self) -> bool {
self.dent.is_stdin()
}
/// Return the metadata for the file that this entry points to.
pub fn metadata(&self) -> Result<Metadata, Error> {
self.dent.metadata()
}
/// Return the file type for the file that this entry points to.
///
/// This entry doesn't have a file type if it corresponds to stdin.
pub fn file_type(&self) -> Option<FileType> {
self.dent.file_type()
}
/// Return the file name of this entry.
///
/// If this entry has no file name (e.g., `/`), then the full path is
/// returned.
pub fn file_name(&self) -> &OsStr {
self.dent.file_name()
}
/// Returns the depth at which this entry was created relative to the root.
pub fn depth(&self) -> usize {
self.dent.depth()
}
/// Returns the underlying inode number if one exists.
///
/// If this entry doesn't have an inode number, then `None` is returned.
#[cfg(unix)]
pub fn ino(&self) -> Option<u64> {
self.dent.ino()
}
/// Returns an error, if one exists, associated with processing this entry.
///
/// An example of an error is one that occurred while parsing an ignore
/// file.
pub fn error(&self) -> Option<&Error> {
self.err.as_ref()
}
fn new_stdin() -> DirEntry {
DirEntry {
dent: DirEntryInner::Stdin,
err: None,
}
}
fn new_walkdir(dent: walkdir::DirEntry, err: Option<Error>) -> DirEntry {
DirEntry {
dent: DirEntryInner::Walkdir(dent),
err: err,
}
}
fn new_raw(dent: DirEntryRaw, err: Option<Error>) -> DirEntry {
DirEntry {
dent: DirEntryInner::Raw(dent),
err: err,
}
}
}
/// DirEntryInner is the implementation of DirEntry.
///
/// It specifically represents three distinct sources of directory entries:
///
/// 1. From the walkdir crate.
/// 2. Special entries that represent things like stdin.
/// 3. From a path.
///
/// Specifically, (3) has to essentially re-create the DirEntry implementation
/// from WalkDir.
#[derive(Debug)]
enum DirEntryInner {
Stdin,
Walkdir(walkdir::DirEntry),
Raw(DirEntryRaw),
}
impl DirEntryInner {
fn path(&self) -> &Path {
use self::DirEntryInner::*;
match *self {
Stdin => Path::new("<stdin>"),
Walkdir(ref x) => x.path(),
Raw(ref x) => x.path(),
}
}
fn path_is_symbolic_link(&self) -> bool {
use self::DirEntryInner::*;
match *self {
Stdin => false,
Walkdir(ref x) => x.path_is_symbolic_link(),
Raw(ref x) => x.path_is_symbolic_link(),
}
}
fn is_stdin(&self) -> bool {
match *self {
DirEntryInner::Stdin => true,
_ => false,
}
}
fn metadata(&self) -> Result<Metadata, Error> {
use self::DirEntryInner::*;
match *self {
Stdin => {
let err = Error::Io(io::Error::new(
io::ErrorKind::Other, "<stdin> has no metadata"));
Err(err.with_path("<stdin>"))
}
Walkdir(ref x) => {
x.metadata().map_err(|err| {
Error::Io(io::Error::from(err)).with_path(x.path())
})
}
Raw(ref x) => x.metadata(),
}
}
fn file_type(&self) -> Option<FileType> {
use self::DirEntryInner::*;
match *self {
Stdin => None,
Walkdir(ref x) => Some(x.file_type()),
Raw(ref x) => Some(x.file_type()),
}
}
fn file_name(&self) -> &OsStr {
use self::DirEntryInner::*;
match *self {
Stdin => OsStr::new("<stdin>"),
Walkdir(ref x) => x.file_name(),
Raw(ref x) => x.file_name(),
}
}
fn depth(&self) -> usize {
use self::DirEntryInner::*;
match *self {
Stdin => 0,
Walkdir(ref x) => x.depth(),
Raw(ref x) => x.depth(),
}
}
#[cfg(unix)]
fn ino(&self) -> Option<u64> {
use self::DirEntryInner::*;
match *self {
Stdin => None,
Walkdir(ref x) => Some(x.ino()),
Raw(ref x) => Some(x.ino()),
}
}
}
/// DirEntryRaw is essentially copied from the walkdir crate so that we can
/// build `DirEntry`s from whole cloth in the parallel iterator.
struct DirEntryRaw {
/// The path as reported by the `fs::ReadDir` iterator (even if it's a
/// symbolic link).
path: PathBuf,
/// The file type. Necessary for recursive iteration, so store it.
ty: FileType,
/// Is set when this entry was created from a symbolic link and the user
/// expects the iterator to follow symbolic links.
follow_link: bool,
/// The depth at which this entry was generated relative to the root.
depth: usize,
/// The underlying inode number (Unix only).
#[cfg(unix)]
ino: u64,
}
impl fmt::Debug for DirEntryRaw {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
// Leaving out FileType because it doesn't have a debug impl
// in Rust 1.9. We could add it if we really wanted to by manually
// querying each possibly file type. Meh. ---AG
f.debug_struct("DirEntryRaw")
.field("path", &self.path)
.field("follow_link", &self.follow_link)
.field("depth", &self.depth)
.finish()
}
}
impl DirEntryRaw {
fn path(&self) -> &Path {
&self.path
}
fn path_is_symbolic_link(&self) -> bool {
self.ty.is_symlink() || self.follow_link
}
fn metadata(&self) -> Result<Metadata, Error> {
if self.follow_link {
fs::metadata(&self.path)
} else {
fs::symlink_metadata(&self.path)
}.map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
}
fn file_type(&self) -> FileType {
self.ty
}
fn file_name(&self) -> &OsStr {
self.path.file_name().unwrap_or_else(|| self.path.as_os_str())
}
fn depth(&self) -> usize {
self.depth
}
#[cfg(unix)]
fn ino(&self) -> u64 {
self.ino
}
fn from_entry(
depth: usize,
ent: &fs::DirEntry,
) -> Result<DirEntryRaw, Error> {
let ty = try!(ent.file_type().map_err(|err| {
let err = Error::Io(io::Error::from(err)).with_path(ent.path());
Error::WithDepth {
depth: depth,
err: Box::new(err),
}
}));
Ok(DirEntryRaw::from_entry_os(depth, ent, ty))
}
#[cfg(not(unix))]
fn from_entry_os(
depth: usize,
ent: &fs::DirEntry,
ty: fs::FileType,
) -> DirEntryRaw {
DirEntryRaw {
path: ent.path(),
ty: ty,
follow_link: false,
depth: depth,
}
}
#[cfg(unix)]
fn from_entry_os(
depth: usize,
ent: &fs::DirEntry,
ty: fs::FileType,
) -> DirEntryRaw {
use std::os::unix::fs::DirEntryExt;
DirEntryRaw {
path: ent.path(),
ty: ty,
follow_link: false,
depth: depth,
ino: ent.ino(),
}
}
#[cfg(not(unix))]
fn from_link(depth: usize, pb: PathBuf) -> Result<DirEntryRaw, Error> {
let md = try!(fs::metadata(&pb).map_err(|err| {
Error::Io(err).with_path(&pb)
}));
Ok(DirEntryRaw {
path: pb,
ty: md.file_type(),
follow_link: true,
depth: depth,
})
}
#[cfg(unix)]
fn from_link(depth: usize, pb: PathBuf) -> Result<DirEntryRaw, Error> {
use std::os::unix::fs::MetadataExt;
let md = try!(fs::metadata(&pb).map_err(|err| {
Error::Io(err).with_path(&pb)
}));
Ok(DirEntryRaw {
path: pb,
ty: md.file_type(),
follow_link: true,
depth: depth,
ino: md.ino(),
})
}
}
/// WalkBuilder builds a recursive directory iterator.
///
/// The builder supports a large number of configurable options. This includes
/// specific glob overrides, file type matching, toggling whether hidden
/// files are ignored or not, and of course, support for respecting gitignore
/// files.
///
/// By default, all ignore files found are respected. This includes `.ignore`,
/// `.gitignore`, `.git/info/exclude` and even your global gitignore
/// globs, usually found in `$XDG_CONFIG_HOME/git/ignore`.
///
/// Some standard recursive directory options are also supported, such as
/// limiting the recursive depth or whether to follow symbolic links (disabled
/// by default).
///
/// # Ignore rules
///
/// There are many rules that influence whether a particular file or directory
/// is skipped by this iterator. Those rules are documented here. Note that
/// the rules assume a default configuration.
///
/// * First, glob overrides are checked. If a path matches a glob override,
/// then matching stops. The path is then only skipped if the glob that matched
/// the path is an ignore glob. (An override glob is a whitelist glob unless it
/// starts with a `!`, in which case it is an ignore glob.)
/// * Second, ignore files are checked. Ignore files currently only come from
/// git ignore files (`.gitignore`, `.git/info/exclude` and the configured
/// global gitignore file), plain `.ignore` files, which have the same format
/// as gitignore files, or explicitly added ignore files. The precedence order
/// is: `.ignore`, `.gitignore`, `.git/info/exclude`, global gitignore and
/// finally explicitly added ignore files. Note that precedence between
/// different types of ignore files is not impacted by the directory hierarchy;
/// any `.ignore` file overrides all `.gitignore` files. Within each
/// precedence level, more nested ignore files have a higher precedence over
/// less nested ignore files.
/// * Third, if the previous step yields an ignore match, than all matching
/// is stopped and the path is skipped.. If it yields a whitelist match, then
/// process continues. A whitelist match can be overridden by a later matcher.
/// * Fourth, unless the path is a directory, the file type matcher is run on
/// the path. As above, if it's an ignore match, then all matching is stopped
/// and the path is skipped. If it's a whitelist match, then matching
/// continues.
/// * Fifth, if the path hasn't been whitelisted and it is hidden, then the
/// path is skipped.
/// * Sixth, unless the path is a directory, the size of the file is compared
/// against the max filesize limit. If it exceeds the limit, it is skipped.
/// * Seventh, if the path has made it this far then it is yielded in the
/// iterator.
#[derive(Clone)]
pub struct WalkBuilder {
paths: Vec<PathBuf>,
ig_builder: IgnoreBuilder,
parents: bool,
max_depth: Option<usize>,
max_filesize: Option<u64>,
follow_links: bool,
sorter: Option<Arc<Fn(&OsString, &OsString) -> cmp::Ordering + 'static>>,
threads: usize,
}
impl fmt::Debug for WalkBuilder {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("WalkBuilder")
.field("paths", &self.paths)
.field("ig_builder", &self.ig_builder)
.field("parents", &self.parents)
.field("max_depth", &self.max_depth)
.field("max_filesize", &self.max_filesize)
.field("follow_links", &self.follow_links)
.field("threads", &self.threads)
.finish()
}
}
impl WalkBuilder {
/// Create a new builder for a recursive directory iterator for the
/// directory given.
///
/// Note that if you want to traverse multiple different directories, it
/// is better to call `add` on this builder than to create multiple
/// `Walk` values.
pub fn new<P: AsRef<Path>>(path: P) -> WalkBuilder {
WalkBuilder {
paths: vec![path.as_ref().to_path_buf()],
ig_builder: IgnoreBuilder::new(),
parents: true,
max_depth: None,
max_filesize: None,
follow_links: false,
sorter: None,
threads: 0,
}
}
/// Build a new `Walk` iterator.
pub fn build(&self) -> Walk {
let follow_links = self.follow_links;
let max_depth = self.max_depth;
let cmp = self.sorter.clone();
let its = self.paths.iter().map(move |p| {
if p == Path::new("-") {
(p.to_path_buf(), None)
} else {
let mut wd = WalkDir::new(p);
wd = wd.follow_links(follow_links || p.is_file());
if let Some(max_depth) = max_depth {
wd = wd.max_depth(max_depth);
}
if let Some(ref cmp) = cmp {
let cmp = cmp.clone();
wd = wd.sort_by(move |a, b| cmp(a, b));
}
(p.to_path_buf(), Some(WalkEventIter::from(wd)))
}
}).collect::<Vec<_>>().into_iter();
let ig_root = self.ig_builder.build();
Walk {
its: its,
it: None,
ig_root: ig_root.clone(),
ig: ig_root.clone(),
max_filesize: self.max_filesize,
parents: self.parents,
}
}
/// Build a new `WalkParallel` iterator.
///
/// Note that this *doesn't* return something that implements `Iterator`.
/// Instead, the returned value must be run with a closure. e.g.,
/// `builder.build_parallel().run(|| |path| println!("{:?}", path))`.
pub fn build_parallel(&self) -> WalkParallel {
WalkParallel {
paths: self.paths.clone().into_iter(),
ig_root: self.ig_builder.build(),
max_depth: self.max_depth,
max_filesize: self.max_filesize,
follow_links: self.follow_links,
parents: self.parents,
threads: self.threads,
}
}
/// Add a file path to the iterator.
///
/// Each additional file path added is traversed recursively. This should
/// be preferred over building multiple `Walk` iterators since this
/// enables reusing resources across iteration.
pub fn add<P: AsRef<Path>>(&mut self, path: P) -> &mut WalkBuilder {
self.paths.push(path.as_ref().to_path_buf());
self
}
/// The maximum depth to recurse.
///
/// The default, `None`, imposes no depth restriction.
pub fn max_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder {
self.max_depth = depth;
self
}
/// Whether to follow symbolic links or not.
pub fn follow_links(&mut self, yes: bool) -> &mut WalkBuilder {
self.follow_links = yes;
self
}
/// Whether to ignore files above the specified limit.
pub fn max_filesize(&mut self, filesize: Option<u64>) -> &mut WalkBuilder {
self.max_filesize = filesize;
self
}
/// The number of threads to use for traversal.
///
/// Note that this only has an effect when using `build_parallel`.
///
/// The default setting is `0`, which chooses the number of threads
/// automatically using heuristics.
pub fn threads(&mut self, n: usize) -> &mut WalkBuilder {
self.threads = n;
self
}
/// Add an ignore file to the matcher.
///
/// This has lower precedence than all other sources of ignore rules.
///
/// If there was a problem adding the ignore file, then an error is
/// returned. Note that the error may indicate *partial* failure. For
/// example, if an ignore file contains an invalid glob, all other globs
/// are still applied.
pub fn add_ignore<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> {
let mut builder = GitignoreBuilder::new("");
let mut errs = PartialErrorBuilder::default();
errs.maybe_push(builder.add(path));
match builder.build() {
Ok(gi) => { self.ig_builder.add_ignore(gi); }
Err(err) => { errs.push(err); }
}
errs.into_error_option()
}
/// Add an override matcher.
///
/// By default, no override matcher is used.
///
/// This overrides any previous setting.
pub fn overrides(&mut self, overrides: Override) -> &mut WalkBuilder {
self.ig_builder.overrides(overrides);
self
}
/// Add a file type matcher.
///
/// By default, no file type matcher is used.
///
/// This overrides any previous setting.
pub fn types(&mut self, types: Types) -> &mut WalkBuilder {
self.ig_builder.types(types);
self
}
/// Enables ignoring hidden files.
///
/// This is enabled by default.
pub fn hidden(&mut self, yes: bool) -> &mut WalkBuilder {
self.ig_builder.hidden(yes);
self
}
/// Enables reading ignore files from parent directories.
///
/// If this is enabled, then the parent directories of each file path given
/// are traversed for ignore files (subject to the ignore settings on
/// this builder). Note that file paths are canonicalized with respect to
/// the current working directory in order to determine parent directories.
///
/// This is enabled by default.
pub fn parents(&mut self, yes: bool) -> &mut WalkBuilder {
self.parents = yes;
self
}
/// Enables reading `.ignore` files.
///
/// `.ignore` files have the same semantics as `gitignore` files and are
/// supported by search tools such as ripgrep and The Silver Searcher.
///
/// This is enabled by default.
pub fn ignore(&mut self, yes: bool) -> &mut WalkBuilder {
self.ig_builder.ignore(yes);
self
}
/// Enables reading a global gitignore file, whose path is specified in
/// git's `core.excludesFile` config option.
///
/// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig`
/// does not exist or does not specify `core.excludesFile`, then
/// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not
/// set or is empty, then `$HOME/.config/git/ignore` is used instead.
pub fn git_global(&mut self, yes: bool) -> &mut WalkBuilder {
self.ig_builder.git_global(yes);
self
}
/// Enables reading `.gitignore` files.
///
/// `.gitignore` files have match semantics as described in the `gitignore`
/// man page.
///
/// This is enabled by default.
pub fn git_ignore(&mut self, yes: bool) -> &mut WalkBuilder {
self.ig_builder.git_ignore(yes);
self
}
/// Enables reading `.git/info/exclude` files.
///
/// `.git/info/exclude` files have match semantics as described in the
/// `gitignore` man page.
///
/// This is enabled by default.
pub fn git_exclude(&mut self, yes: bool) -> &mut WalkBuilder {
self.ig_builder.git_exclude(yes);
self
}
/// Set a function for sorting directory entries.
///
/// If a compare function is set, the resulting iterator will return all
/// paths in sorted order. The compare function will be called to compare
/// names from entries from the same directory using only the name of the
/// entry.
///
/// Note that this is not used in the parallel iterator.
pub fn sort_by<F>(&mut self, cmp: F) -> &mut WalkBuilder
where F: Fn(&OsString, &OsString) -> cmp::Ordering + 'static {
self.sorter = Some(Arc::new(cmp));
self
}
}
/// Walk is a recursive directory iterator over file paths in one or more
/// directories.
///
/// Only file and directory paths matching the rules are returned. By default,
/// ignore files like `.gitignore` are respected. The precise matching rules
/// and precedence is explained in the documentation for `WalkBuilder`.
pub struct Walk {
its: vec::IntoIter<(PathBuf, Option<WalkEventIter>)>,
it: Option<WalkEventIter>,
ig_root: Ignore,
ig: Ignore,
max_filesize: Option<u64>,
parents: bool,
}
impl Walk {
/// Creates a new recursive directory iterator for the file path given.
///
/// Note that this uses default settings, which include respecting
/// `.gitignore` files. To configure the iterator, use `WalkBuilder`
/// instead.
pub fn new<P: AsRef<Path>>(path: P) -> Walk {
WalkBuilder::new(path).build()
}
fn skip_entry(&self, ent: &walkdir::DirEntry) -> bool {
if ent.depth() == 0 {
return false;
}
let is_dir = ent.file_type().is_dir();
let max_size = self.max_filesize;
let should_skip_path = skip_path(&self.ig, ent.path(), is_dir);
let should_skip_filesize = if !is_dir && max_size.is_some() {
skip_filesize(max_size.unwrap(), ent.path(), &ent.metadata().ok())
} else {
false
};
should_skip_path || should_skip_filesize
}
}
impl Iterator for Walk {
type Item = Result<DirEntry, Error>;
#[inline(always)]
fn next(&mut self) -> Option<Result<DirEntry, Error>> {
loop {
let ev = match self.it.as_mut().and_then(|it| it.next()) {
Some(ev) => ev,
None => {
match self.its.next() {
None => return None,
Some((_, None)) => {
return Some(Ok(DirEntry::new_stdin()));
}
Some((path, Some(it))) => {
self.it = Some(it);
if self.parents && path.is_dir() {
let (ig, err) = self.ig_root.add_parents(path);
self.ig = ig;
if let Some(err) = err {
return Some(Err(err));
}
} else {
self.ig = self.ig_root.clone();
}
}
}
continue;
}
};
match ev {
Err(err) => {
return Some(Err(Error::from(err)));
}
Ok(WalkEvent::Exit) => {
self.ig = self.ig.parent().unwrap();
}
Ok(WalkEvent::Dir(ent)) => {
if self.skip_entry(&ent) {
self.it.as_mut().unwrap().it.skip_current_dir();
// Still need to push this on the stack because
// we'll get a WalkEvent::Exit event for this dir.
// We don't care if it errors though.
let (igtmp, _) = self.ig.add_child(ent.path());
self.ig = igtmp;
continue;
}
let (igtmp, err) = self.ig.add_child(ent.path());
self.ig = igtmp;
return Some(Ok(DirEntry::new_walkdir(ent, err)));
}
Ok(WalkEvent::File(ent)) => {
if self.skip_entry(&ent) {
continue;
}
return Some(Ok(DirEntry::new_walkdir(ent, None)));
}
}
}
}
}
/// WalkEventIter transforms a WalkDir iterator into an iterator that more
/// accurately describes the directory tree. Namely, it emits events that are
/// one of three types: directory, file or "exit." An "exit" event means that
/// the entire contents of a directory have been enumerated.
struct WalkEventIter {
depth: usize,
it: walkdir::Iter,
next: Option<Result<walkdir::DirEntry, walkdir::Error>>,
}
#[derive(Debug)]
enum WalkEvent {
Dir(walkdir::DirEntry),
File(walkdir::DirEntry),
Exit,
}
impl From<WalkDir> for WalkEventIter {
fn from(it: WalkDir) -> WalkEventIter {
WalkEventIter { depth: 0, it: it.into_iter(), next: None }
}
}
impl Iterator for WalkEventIter {
type Item = walkdir::Result<WalkEvent>;
#[inline(always)]
fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
let dent = self.next.take().or_else(|| self.it.next());
let depth = match dent {
None => 0,
Some(Ok(ref dent)) => dent.depth(),
Some(Err(ref err)) => err.depth(),
};
if depth < self.depth {
self.depth -= 1;
self.next = dent;
return Some(Ok(WalkEvent::Exit));
}
self.depth = depth;
match dent {
None => None,
Some(Err(err)) => Some(Err(err)),
Some(Ok(dent)) => {
if dent.file_type().is_dir() {
self.depth += 1;
Some(Ok(WalkEvent::Dir(dent)))
} else {
Some(Ok(WalkEvent::File(dent)))
}
}
}
}
}
/// WalkState is used in the parallel recursive directory iterator to indicate
/// whether walking should continue as normal, skip descending into a
/// particular directory or quit the walk entirely.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum WalkState {
/// Continue walking as normal.
Continue,
/// If the directory entry given is a directory, don't descend into it.
/// In all other cases, this has no effect.
Skip,
/// Quit the entire iterator as soon as possible.
///
/// Note that this is an inherently asynchronous action. It is possible
/// for more entries to be yielded even after instructing the iterator
/// to quit.
Quit,
}
impl WalkState {
fn is_quit(&self) -> bool {
*self == WalkState::Quit
}
}
/// WalkParallel is a parallel recursive directory iterator over files paths
/// in one or more directories.
///
/// Only file and directory paths matching the rules are returned. By default,
/// ignore files like `.gitignore` are respected. The precise matching rules
/// and precedence is explained in the documentation for `WalkBuilder`.
///
/// Unlike `Walk`, this uses multiple threads for traversing a directory.
pub struct WalkParallel {
paths: vec::IntoIter<PathBuf>,
ig_root: Ignore,
parents: bool,
max_filesize: Option<u64>,
max_depth: Option<usize>,
follow_links: bool,
threads: usize,
}
impl WalkParallel {
/// Execute the parallel recursive directory iterator. `mkf` is called
/// for each thread used for iteration. The function produced by `mkf`
/// is then in turn called for each visited file path.
pub fn run<F>(
self,
mut mkf: F,
) where F: FnMut() -> Box<FnMut(Result<DirEntry, Error>) -> WalkState + Send + 'static> {
let mut f = mkf();
let threads = self.threads();
let queue = Arc::new(MsQueue::new());
let mut any_work = false;
// Send the initial set of root paths to the pool of workers.
// Note that we only send directories. For files, we send to them the
// callback directly.
for path in self.paths {
let dent =
if path == Path::new("-") {
DirEntry::new_stdin()
} else {
match DirEntryRaw::from_link(0, path) {
Ok(dent) => DirEntry::new_raw(dent, None),
Err(err) => {
if f(Err(err)).is_quit() {
return;
}
continue;
}
}
};
queue.push(Message::Work(Work {
dent: dent,
ignore: self.ig_root.clone(),
}));
any_work = true;
}
// ... but there's no need to start workers if we don't need them.
if !any_work {
return;
}
// Create the workers and then wait for them to finish.
let num_waiting = Arc::new(AtomicUsize::new(0));
let num_quitting = Arc::new(AtomicUsize::new(0));
let quit_now = Arc::new(AtomicBool::new(false));
let mut handles = vec![];
for _ in 0..threads {
let worker = Worker {
f: mkf(),
queue: queue.clone(),
quit_now: quit_now.clone(),
is_waiting: false,
is_quitting: false,
num_waiting: num_waiting.clone(),
num_quitting: num_quitting.clone(),
threads: threads,
parents: self.parents,
max_depth: self.max_depth,
max_filesize: self.max_filesize,
follow_links: self.follow_links,
};
handles.push(thread::spawn(|| worker.run()));
}
for handle in handles {
handle.join().unwrap();
}
}
fn threads(&self) -> usize {
if self.threads == 0 {
2
} else {
self.threads
}
}
}
/// Message is the set of instructions that a worker knows how to process.
enum Message {
/// A work item corresponds to a directory that should be descended into.
/// Work items for entries that should be skipped or ignored should not
/// be produced.
Work(Work),
/// This instruction indicates that the worker should start quitting.
Quit,
}
/// A unit of work for each worker to process.
///
/// Each unit of work corresponds to a directory that should be descended
/// into.
struct Work {
/// The directory entry.
dent: DirEntry,
/// Any ignore matchers that have been built for this directory's parents.
ignore: Ignore,
}
impl Work {
/// Returns true if and only if this work item is a directory.
fn is_dir(&self) -> bool {
self.dent.file_type().map_or(false, |t| t.is_dir())
}
/// Adds ignore rules for parent directories.
///
/// Note that this only applies to entries at depth 0. On all other
/// entries, this is a no-op.
fn add_parents(&mut self) -> Option<Error> {
if self.dent.depth() > 0 {
return None;
}
// At depth 0, the path of this entry is a root path, so we can
// use it directly to add parent ignore rules.
let (ig, err) = self.ignore.add_parents(self.dent.path());
self.ignore = ig;
err
}
/// Reads the directory contents of this work item and adds ignore
/// rules for this directory.
///
/// If there was a problem with reading the directory contents, then
/// an error is returned. If there was a problem reading the ignore
/// rules for this directory, then the error is attached to this
/// work item's directory entry.
fn read_dir(&mut self) -> Result<fs::ReadDir, Error> {
let readdir = match fs::read_dir(self.dent.path()) {
Ok(readdir) => readdir,
Err(err) => {
let err = Error::from(err)
.with_path(self.dent.path())
.with_depth(self.dent.depth());
return Err(err);
}
};
let (ig, err) = self.ignore.add_child(self.dent.path());
self.ignore = ig;
self.dent.err = err;
Ok(readdir)
}
}
/// A worker is responsible for descending into directories, updating the
/// ignore matchers, producing new work and invoking the caller's callback.