walkdir/
lib.rs

1/*!
2Crate `walkdir` provides an efficient and cross platform implementation
3of recursive directory traversal. Several options are exposed to control
4iteration, such as whether to follow symbolic links (default off), limit the
5maximum number of simultaneous open file descriptors and the ability to
6efficiently skip descending into directories.
7
8To use this crate, add `walkdir` as a dependency to your project's
9`Cargo.toml`:
10
11```toml
12[dependencies]
13walkdir = "2"
14```
15
16# From the top
17
18The [`WalkDir`] type builds iterators. The [`DirEntry`] type describes values
19yielded by the iterator. Finally, the [`Error`] type is a small wrapper around
20[`std::io::Error`] with additional information, such as if a loop was detected
21while following symbolic links (not enabled by default).
22
23[`WalkDir`]: struct.WalkDir.html
24[`DirEntry`]: struct.DirEntry.html
25[`Error`]: struct.Error.html
26[`std::io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html
27
28# Example
29
30The following code recursively iterates over the directory given and prints
31the path for each entry:
32
33```no_run
34use walkdir::WalkDir;
35# use walkdir::Error;
36
37# fn try_main() -> Result<(), Error> {
38for entry in WalkDir::new("foo") {
39    println!("{}", entry?.path().display());
40}
41# Ok(())
42# }
43```
44
45Or, if you'd like to iterate over all entries and ignore any errors that
46may arise, use [`filter_map`]. (e.g., This code below will silently skip
47directories that the owner of the running process does not have permission to
48access.)
49
50```no_run
51use walkdir::WalkDir;
52
53for entry in WalkDir::new("foo").into_iter().filter_map(|e| e.ok()) {
54    println!("{}", entry.path().display());
55}
56```
57
58[`filter_map`]: https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html#method.filter_map
59
60# Example: follow symbolic links
61
62The same code as above, except [`follow_links`] is enabled:
63
64```no_run
65use walkdir::WalkDir;
66# use walkdir::Error;
67
68# fn try_main() -> Result<(), Error> {
69for entry in WalkDir::new("foo").follow_links(true) {
70    println!("{}", entry?.path().display());
71}
72# Ok(())
73# }
74```
75
76[`follow_links`]: struct.WalkDir.html#method.follow_links
77
78# Example: skip hidden files and directories on unix
79
80This uses the [`filter_entry`] iterator adapter to avoid yielding hidden files
81and directories efficiently (i.e. without recursing into hidden directories):
82
83```no_run
84use walkdir::{DirEntry, WalkDir};
85# use walkdir::Error;
86
87fn is_hidden(entry: &DirEntry) -> bool {
88    entry.file_name()
89         .to_str()
90         .map(|s| s.starts_with("."))
91         .unwrap_or(false)
92}
93
94# fn try_main() -> Result<(), Error> {
95let walker = WalkDir::new("foo").into_iter();
96for entry in walker.filter_entry(|e| !is_hidden(e)) {
97    println!("{}", entry?.path().display());
98}
99# Ok(())
100# }
101```
102
103[`filter_entry`]: struct.IntoIter.html#method.filter_entry
104*/
105
106#![deny(missing_docs)]
107#![allow(unknown_lints)]
108
109#[cfg(test)]
110doc_comment::doctest!("../README.md");
111
112use std::cmp::{min, Ordering};
113use std::fmt;
114use std::fs::{self, ReadDir};
115use std::io;
116use std::path::{Path, PathBuf};
117use std::result;
118use std::vec;
119
120use same_file::Handle;
121
122pub use crate::dent::DirEntry;
123#[cfg(unix)]
124pub use crate::dent::DirEntryExt;
125pub use crate::error::Error;
126
127mod dent;
128mod error;
129#[cfg(test)]
130mod tests;
131mod util;
132
133/// Like try, but for iterators that return [`Option<Result<_, _>>`].
134///
135/// [`Option<Result<_, _>>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html
136macro_rules! itry {
137    ($e:expr) => {
138        match $e {
139            Ok(v) => v,
140            Err(err) => return Some(Err(From::from(err))),
141        }
142    };
143}
144
145/// A result type for walkdir operations.
146///
147/// Note that this result type embeds the error type in this crate. This
148/// is only useful if you care about the additional information provided by
149/// the error (such as the path associated with the error or whether a loop
150/// was dectected). If you want things to Just Work, then you can use
151/// [`io::Result`] instead since the error type in this package will
152/// automatically convert to an [`io::Result`] when using the [`try!`] macro.
153///
154/// [`io::Result`]: https://doc.rust-lang.org/stable/std/io/type.Result.html
155/// [`try!`]: https://doc.rust-lang.org/stable/std/macro.try.html
156pub type Result<T> = ::std::result::Result<T, Error>;
157
158/// A builder to create an iterator for recursively walking a directory.
159///
160/// Results are returned in depth first fashion, with directories yielded
161/// before their contents. If [`contents_first`] is true, contents are yielded
162/// before their directories. The order is unspecified but if [`sort_by`] is
163/// given, directory entries are sorted according to this function. Directory
164/// entries `.` and `..` are always omitted.
165///
166/// If an error occurs at any point during iteration, then it is returned in
167/// place of its corresponding directory entry and iteration continues as
168/// normal. If an error occurs while opening a directory for reading, then it
169/// is not descended into (but the error is still yielded by the iterator).
170/// Iteration may be stopped at any time. When the iterator is destroyed, all
171/// resources associated with it are freed.
172///
173/// [`contents_first`]: struct.WalkDir.html#method.contents_first
174/// [`sort_by`]: struct.WalkDir.html#method.sort_by
175///
176/// # Usage
177///
178/// This type implements [`IntoIterator`] so that it may be used as the subject
179/// of a `for` loop. You may need to call [`into_iter`] explicitly if you want
180/// to use iterator adapters such as [`filter_entry`].
181///
182/// Idiomatic use of this type should use method chaining to set desired
183/// options. For example, this only shows entries with a depth of `1`, `2` or
184/// `3` (relative to `foo`):
185///
186/// ```no_run
187/// use walkdir::WalkDir;
188/// # use walkdir::Error;
189///
190/// # fn try_main() -> Result<(), Error> {
191/// for entry in WalkDir::new("foo").min_depth(1).max_depth(3) {
192///     println!("{}", entry?.path().display());
193/// }
194/// # Ok(())
195/// # }
196/// ```
197///
198/// [`IntoIterator`]: https://doc.rust-lang.org/stable/std/iter/trait.IntoIterator.html
199/// [`into_iter`]: https://doc.rust-lang.org/nightly/core/iter/trait.IntoIterator.html#tymethod.into_iter
200/// [`filter_entry`]: struct.IntoIter.html#method.filter_entry
201///
202/// Note that the iterator by default includes the top-most directory. Since
203/// this is the only directory yielded with depth `0`, it is easy to ignore it
204/// with the [`min_depth`] setting:
205///
206/// ```no_run
207/// use walkdir::WalkDir;
208/// # use walkdir::Error;
209///
210/// # fn try_main() -> Result<(), Error> {
211/// for entry in WalkDir::new("foo").min_depth(1) {
212///     println!("{}", entry?.path().display());
213/// }
214/// # Ok(())
215/// # }
216/// ```
217///
218/// [`min_depth`]: struct.WalkDir.html#method.min_depth
219///
220/// This will only return descendents of the `foo` directory and not `foo`
221/// itself.
222///
223/// # Loops
224///
225/// This iterator (like most/all recursive directory iterators) assumes that
226/// no loops can be made with *hard* links on your file system. In particular,
227/// this would require creating a hard link to a directory such that it creates
228/// a loop. On most platforms, this operation is illegal.
229///
230/// Note that when following symbolic/soft links, loops are detected and an
231/// error is reported.
232#[derive(Debug)]
233pub struct WalkDir {
234    opts: WalkDirOptions,
235    root: PathBuf,
236}
237
238struct WalkDirOptions {
239    follow_links: bool,
240    max_open: usize,
241    min_depth: usize,
242    max_depth: usize,
243    sorter: Option<
244        Box<
245            dyn FnMut(&DirEntry, &DirEntry) -> Ordering
246                + Send
247                + Sync
248                + 'static,
249        >,
250    >,
251    contents_first: bool,
252    same_file_system: bool,
253}
254
255impl fmt::Debug for WalkDirOptions {
256    fn fmt(
257        &self,
258        f: &mut fmt::Formatter<'_>,
259    ) -> result::Result<(), fmt::Error> {
260        let sorter_str = if self.sorter.is_some() {
261            // FnMut isn't `Debug`
262            "Some(...)"
263        } else {
264            "None"
265        };
266        f.debug_struct("WalkDirOptions")
267            .field("follow_links", &self.follow_links)
268            .field("max_open", &self.max_open)
269            .field("min_depth", &self.min_depth)
270            .field("max_depth", &self.max_depth)
271            .field("sorter", &sorter_str)
272            .field("contents_first", &self.contents_first)
273            .field("same_file_system", &self.same_file_system)
274            .finish()
275    }
276}
277
278impl WalkDir {
279    /// Create a builder for a recursive directory iterator starting at the
280    /// file path `root`. If `root` is a directory, then it is the first item
281    /// yielded by the iterator. If `root` is a file, then it is the first
282    /// and only item yielded by the iterator. If `root` is a symlink, then it
283    /// is always followed for the purposes of directory traversal. (A root
284    /// `DirEntry` still obeys its documentation with respect to symlinks and
285    /// the `follow_links` setting.)
286    pub fn new<P: AsRef<Path>>(root: P) -> Self {
287        WalkDir {
288            opts: WalkDirOptions {
289                follow_links: false,
290                max_open: 10,
291                min_depth: 0,
292                max_depth: ::std::usize::MAX,
293                sorter: None,
294                contents_first: false,
295                same_file_system: false,
296            },
297            root: root.as_ref().to_path_buf(),
298        }
299    }
300
301    /// Set the minimum depth of entries yielded by the iterator.
302    ///
303    /// The smallest depth is `0` and always corresponds to the path given
304    /// to the `new` function on this type. Its direct descendents have depth
305    /// `1`, and their descendents have depth `2`, and so on.
306    pub fn min_depth(mut self, depth: usize) -> Self {
307        self.opts.min_depth = depth;
308        if self.opts.min_depth > self.opts.max_depth {
309            self.opts.min_depth = self.opts.max_depth;
310        }
311        self
312    }
313
314    /// Set the maximum depth of entries yield by the iterator.
315    ///
316    /// The smallest depth is `0` and always corresponds to the path given
317    /// to the `new` function on this type. Its direct descendents have depth
318    /// `1`, and their descendents have depth `2`, and so on.
319    ///
320    /// Note that this will not simply filter the entries of the iterator, but
321    /// it will actually avoid descending into directories when the depth is
322    /// exceeded.
323    pub fn max_depth(mut self, depth: usize) -> Self {
324        self.opts.max_depth = depth;
325        if self.opts.max_depth < self.opts.min_depth {
326            self.opts.max_depth = self.opts.min_depth;
327        }
328        self
329    }
330
331    /// Follow symbolic links. By default, this is disabled.
332    ///
333    /// When `yes` is `true`, symbolic links are followed as if they were
334    /// normal directories and files. If a symbolic link is broken or is
335    /// involved in a loop, an error is yielded.
336    ///
337    /// When enabled, the yielded [`DirEntry`] values represent the target of
338    /// the link while the path corresponds to the link. See the [`DirEntry`]
339    /// type for more details.
340    ///
341    /// [`DirEntry`]: struct.DirEntry.html
342    pub fn follow_links(mut self, yes: bool) -> Self {
343        self.opts.follow_links = yes;
344        self
345    }
346
347    /// Set the maximum number of simultaneously open file descriptors used
348    /// by the iterator.
349    ///
350    /// `n` must be greater than or equal to `1`. If `n` is `0`, then it is set
351    /// to `1` automatically. If this is not set, then it defaults to some
352    /// reasonably low number.
353    ///
354    /// This setting has no impact on the results yielded by the iterator
355    /// (even when `n` is `1`). Instead, this setting represents a trade off
356    /// between scarce resources (file descriptors) and memory. Namely, when
357    /// the maximum number of file descriptors is reached and a new directory
358    /// needs to be opened to continue iteration, then a previous directory
359    /// handle is closed and has its unyielded entries stored in memory. In
360    /// practice, this is a satisfying trade off because it scales with respect
361    /// to the *depth* of your file tree. Therefore, low values (even `1`) are
362    /// acceptable.
363    ///
364    /// Note that this value does not impact the number of system calls made by
365    /// an exhausted iterator.
366    ///
367    /// # Platform behavior
368    ///
369    /// On Windows, if `follow_links` is enabled, then this limit is not
370    /// respected. In particular, the maximum number of file descriptors opened
371    /// is proportional to the depth of the directory tree traversed.
372    pub fn max_open(mut self, mut n: usize) -> Self {
373        if n == 0 {
374            n = 1;
375        }
376        self.opts.max_open = n;
377        self
378    }
379
380    /// Set a function for sorting directory entries.
381    ///
382    /// If a compare function is set, the resulting iterator will return all
383    /// paths in sorted order. The compare function will be called to compare
384    /// entries from the same directory.
385    ///
386    /// ```rust,no-run
387    /// use std::cmp;
388    /// use std::ffi::OsString;
389    /// use walkdir::WalkDir;
390    ///
391    /// WalkDir::new("foo").sort_by(|a,b| a.file_name().cmp(b.file_name()));
392    /// ```
393    pub fn sort_by<F>(mut self, cmp: F) -> Self
394    where
395        F: FnMut(&DirEntry, &DirEntry) -> Ordering + Send + Sync + 'static,
396    {
397        self.opts.sorter = Some(Box::new(cmp));
398        self
399    }
400
401    /// Yield a directory's contents before the directory itself. By default,
402    /// this is disabled.
403    ///
404    /// When `yes` is `false` (as is the default), the directory is yielded
405    /// before its contents are read. This is useful when, e.g. you want to
406    /// skip processing of some directories.
407    ///
408    /// When `yes` is `true`, the iterator yields the contents of a directory
409    /// before yielding the directory itself. This is useful when, e.g. you
410    /// want to recursively delete a directory.
411    ///
412    /// # Example
413    ///
414    /// Assume the following directory tree:
415    ///
416    /// ```text
417    /// foo/
418    ///   abc/
419    ///     qrs
420    ///     tuv
421    ///   def/
422    /// ```
423    ///
424    /// With contents_first disabled (the default), the following code visits
425    /// the directory tree in depth-first order:
426    ///
427    /// ```no_run
428    /// use walkdir::WalkDir;
429    ///
430    /// for entry in WalkDir::new("foo") {
431    ///     let entry = entry.unwrap();
432    ///     println!("{}", entry.path().display());
433    /// }
434    ///
435    /// // foo
436    /// // foo/abc
437    /// // foo/abc/qrs
438    /// // foo/abc/tuv
439    /// // foo/def
440    /// ```
441    ///
442    /// With contents_first enabled:
443    ///
444    /// ```no_run
445    /// use walkdir::WalkDir;
446    ///
447    /// for entry in WalkDir::new("foo").contents_first(true) {
448    ///     let entry = entry.unwrap();
449    ///     println!("{}", entry.path().display());
450    /// }
451    ///
452    /// // foo/abc/qrs
453    /// // foo/abc/tuv
454    /// // foo/abc
455    /// // foo/def
456    /// // foo
457    /// ```
458    pub fn contents_first(mut self, yes: bool) -> Self {
459        self.opts.contents_first = yes;
460        self
461    }
462
463    /// Do not cross file system boundaries.
464    ///
465    /// When this option is enabled, directory traversal will not descend into
466    /// directories that are on a different file system from the root path.
467    ///
468    /// Currently, this option is only supported on Unix and Windows. If this
469    /// option is used on an unsupported platform, then directory traversal
470    /// will immediately return an error and will not yield any entries.
471    pub fn same_file_system(mut self, yes: bool) -> Self {
472        self.opts.same_file_system = yes;
473        self
474    }
475}
476
477impl IntoIterator for WalkDir {
478    type Item = Result<DirEntry>;
479    type IntoIter = IntoIter;
480
481    fn into_iter(self) -> IntoIter {
482        IntoIter {
483            opts: self.opts,
484            start: Some(self.root),
485            stack_list: vec![],
486            stack_path: vec![],
487            oldest_opened: 0,
488            depth: 0,
489            deferred_dirs: vec![],
490            root_device: None,
491        }
492    }
493}
494
495/// An iterator for recursively descending into a directory.
496///
497/// A value with this type must be constructed with the [`WalkDir`] type, which
498/// uses a builder pattern to set options such as min/max depth, max open file
499/// descriptors and whether the iterator should follow symbolic links. After
500/// constructing a `WalkDir`, call [`.into_iter()`] at the end of the chain.
501///
502/// The order of elements yielded by this iterator is unspecified.
503///
504/// [`WalkDir`]: struct.WalkDir.html
505/// [`.into_iter()`]: struct.WalkDir.html#into_iter.v
506#[derive(Debug)]
507pub struct IntoIter {
508    /// Options specified in the builder. Depths, max fds, etc.
509    opts: WalkDirOptions,
510    /// The start path.
511    ///
512    /// This is only `Some(...)` at the beginning. After the first iteration,
513    /// this is always `None`.
514    start: Option<PathBuf>,
515    /// A stack of open (up to max fd) or closed handles to directories.
516    /// An open handle is a plain [`fs::ReadDir`] while a closed handle is
517    /// a `Vec<fs::DirEntry>` corresponding to the as-of-yet consumed entries.
518    ///
519    /// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html
520    stack_list: Vec<DirList>,
521    /// A stack of file paths.
522    ///
523    /// This is *only* used when [`follow_links`] is enabled. In all other
524    /// cases this stack is empty.
525    ///
526    /// [`follow_links`]: struct.WalkDir.html#method.follow_links
527    stack_path: Vec<Ancestor>,
528    /// An index into `stack_list` that points to the oldest open directory
529    /// handle. If the maximum fd limit is reached and a new directory needs to
530    /// be read, the handle at this index is closed before the new directory is
531    /// opened.
532    oldest_opened: usize,
533    /// The current depth of iteration (the length of the stack at the
534    /// beginning of each iteration).
535    depth: usize,
536    /// A list of DirEntries corresponding to directories, that are
537    /// yielded after their contents has been fully yielded. This is only
538    /// used when `contents_first` is enabled.
539    deferred_dirs: Vec<DirEntry>,
540    /// The device of the root file path when the first call to `next` was
541    /// made.
542    ///
543    /// If the `same_file_system` option isn't enabled, then this is always
544    /// `None`. Conversely, if it is enabled, this is always `Some(...)` after
545    /// handling the root path.
546    root_device: Option<u64>,
547}
548
549/// An ancestor is an item in the directory tree traversed by walkdir, and is
550/// used to check for loops in the tree when traversing symlinks.
551#[derive(Debug)]
552struct Ancestor {
553    /// The path of this ancestor.
554    path: PathBuf,
555    /// An open file to this ancesor. This is only used on Windows where
556    /// opening a file handle appears to be quite expensive, so we choose to
557    /// cache it. This comes at the cost of not respecting the file descriptor
558    /// limit set by the user.
559    #[cfg(windows)]
560    handle: Handle,
561}
562
563impl Ancestor {
564    /// Create a new ancestor from the given directory path.
565    #[cfg(windows)]
566    fn new(dent: &DirEntry) -> io::Result<Ancestor> {
567        let handle = Handle::from_path(dent.path())?;
568        Ok(Ancestor { path: dent.path().to_path_buf(), handle: handle })
569    }
570
571    /// Create a new ancestor from the given directory path.
572    #[cfg(not(windows))]
573    fn new(dent: &DirEntry) -> io::Result<Ancestor> {
574        Ok(Ancestor { path: dent.path().to_path_buf() })
575    }
576
577    /// Returns true if and only if the given open file handle corresponds to
578    /// the same directory as this ancestor.
579    #[cfg(windows)]
580    fn is_same(&self, child: &Handle) -> io::Result<bool> {
581        Ok(child == &self.handle)
582    }
583
584    /// Returns true if and only if the given open file handle corresponds to
585    /// the same directory as this ancestor.
586    #[cfg(not(windows))]
587    fn is_same(&self, child: &Handle) -> io::Result<bool> {
588        Ok(child == &Handle::from_path(&self.path)?)
589    }
590}
591
592/// A sequence of unconsumed directory entries.
593///
594/// This represents the opened or closed state of a directory handle. When
595/// open, future entries are read by iterating over the raw `fs::ReadDir`.
596/// When closed, all future entries are read into memory. Iteration then
597/// proceeds over a [`Vec<fs::DirEntry>`].
598///
599/// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html
600/// [`Vec<fs::DirEntry>`]: https://doc.rust-lang.org/stable/std/vec/struct.Vec.html
601#[derive(Debug)]
602enum DirList {
603    /// An opened handle.
604    ///
605    /// This includes the depth of the handle itself.
606    ///
607    /// If there was an error with the initial [`fs::read_dir`] call, then it
608    /// is stored here. (We use an [`Option<...>`] to make yielding the error
609    /// exactly once simpler.)
610    ///
611    /// [`fs::read_dir`]: https://doc.rust-lang.org/stable/std/fs/fn.read_dir.html
612    /// [`Option<...>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html
613    Opened { depth: usize, it: result::Result<ReadDir, Option<Error>> },
614    /// A closed handle.
615    ///
616    /// All remaining directory entries are read into memory.
617    Closed(vec::IntoIter<Result<DirEntry>>),
618}
619
620impl Iterator for IntoIter {
621    type Item = Result<DirEntry>;
622    /// Advances the iterator and returns the next value.
623    ///
624    /// # Errors
625    ///
626    /// If the iterator fails to retrieve the next value, this method returns
627    /// an error value. The error will be wrapped in an Option::Some.
628    fn next(&mut self) -> Option<Result<DirEntry>> {
629        if let Some(start) = self.start.take() {
630            if self.opts.same_file_system {
631                let result = util::device_num(&start)
632                    .map_err(|e| Error::from_path(0, start.clone(), e));
633                self.root_device = Some(itry!(result));
634            }
635            let dent = itry!(DirEntry::from_path(0, start, false));
636            if let Some(result) = self.handle_entry(dent) {
637                return Some(result);
638            }
639        }
640        while !self.stack_list.is_empty() {
641            self.depth = self.stack_list.len();
642            if let Some(dentry) = self.get_deferred_dir() {
643                return Some(Ok(dentry));
644            }
645            if self.depth > self.opts.max_depth {
646                // If we've exceeded the max depth, pop the current dir
647                // so that we don't descend.
648                self.pop();
649                continue;
650            }
651            // Unwrap is safe here because we've verified above that
652            // `self.stack_list` is not empty
653            let next = self
654                .stack_list
655                .last_mut()
656                .expect("BUG: stack should be non-empty")
657                .next();
658            match next {
659                None => self.pop(),
660                Some(Err(err)) => return Some(Err(err)),
661                Some(Ok(dent)) => {
662                    if let Some(result) = self.handle_entry(dent) {
663                        return Some(result);
664                    }
665                }
666            }
667        }
668        if self.opts.contents_first {
669            self.depth = self.stack_list.len();
670            if let Some(dentry) = self.get_deferred_dir() {
671                return Some(Ok(dentry));
672            }
673        }
674        None
675    }
676}
677
678impl IntoIter {
679    /// Skips the current directory.
680    ///
681    /// This causes the iterator to stop traversing the contents of the least
682    /// recently yielded directory. This means any remaining entries in that
683    /// directory will be skipped (including sub-directories).
684    ///
685    /// Note that the ergonomics of this method are questionable since it
686    /// borrows the iterator mutably. Namely, you must write out the looping
687    /// condition manually. For example, to skip hidden entries efficiently on
688    /// unix systems:
689    ///
690    /// ```no_run
691    /// use walkdir::{DirEntry, WalkDir};
692    ///
693    /// fn is_hidden(entry: &DirEntry) -> bool {
694    ///     entry.file_name()
695    ///          .to_str()
696    ///          .map(|s| s.starts_with("."))
697    ///          .unwrap_or(false)
698    /// }
699    ///
700    /// let mut it = WalkDir::new("foo").into_iter();
701    /// loop {
702    ///     let entry = match it.next() {
703    ///         None => break,
704    ///         Some(Err(err)) => panic!("ERROR: {}", err),
705    ///         Some(Ok(entry)) => entry,
706    ///     };
707    ///     if is_hidden(&entry) {
708    ///         if entry.file_type().is_dir() {
709    ///             it.skip_current_dir();
710    ///         }
711    ///         continue;
712    ///     }
713    ///     println!("{}", entry.path().display());
714    /// }
715    /// ```
716    ///
717    /// You may find it more convenient to use the [`filter_entry`] iterator
718    /// adapter. (See its documentation for the same example functionality as
719    /// above.)
720    ///
721    /// [`filter_entry`]: #method.filter_entry
722    pub fn skip_current_dir(&mut self) {
723        if !self.stack_list.is_empty() {
724            self.pop();
725        }
726    }
727
728    /// Yields only entries which satisfy the given predicate and skips
729    /// descending into directories that do not satisfy the given predicate.
730    ///
731    /// The predicate is applied to all entries. If the predicate is
732    /// true, iteration carries on as normal. If the predicate is false, the
733    /// entry is ignored and if it is a directory, it is not descended into.
734    ///
735    /// This is often more convenient to use than [`skip_current_dir`]. For
736    /// example, to skip hidden files and directories efficiently on unix
737    /// systems:
738    ///
739    /// ```no_run
740    /// use walkdir::{DirEntry, WalkDir};
741    /// # use walkdir::Error;
742    ///
743    /// fn is_hidden(entry: &DirEntry) -> bool {
744    ///     entry.file_name()
745    ///          .to_str()
746    ///          .map(|s| s.starts_with("."))
747    ///          .unwrap_or(false)
748    /// }
749    ///
750    /// # fn try_main() -> Result<(), Error> {
751    /// for entry in WalkDir::new("foo")
752    ///                      .into_iter()
753    ///                      .filter_entry(|e| !is_hidden(e)) {
754    ///     println!("{}", entry?.path().display());
755    /// }
756    /// # Ok(())
757    /// # }
758    /// ```
759    ///
760    /// Note that the iterator will still yield errors for reading entries that
761    /// may not satisfy the predicate.
762    ///
763    /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not
764    /// passed to this predicate.
765    ///
766    /// Note that if the iterator has `contents_first` enabled, then this
767    /// method is no different than calling the standard `Iterator::filter`
768    /// method (because directory entries are yielded after they've been
769    /// descended into).
770    ///
771    /// [`skip_current_dir`]: #method.skip_current_dir
772    /// [`min_depth`]: struct.WalkDir.html#method.min_depth
773    /// [`max_depth`]: struct.WalkDir.html#method.max_depth
774    pub fn filter_entry<P>(self, predicate: P) -> FilterEntry<Self, P>
775    where
776        P: FnMut(&DirEntry) -> bool,
777    {
778        FilterEntry { it: self, predicate: predicate }
779    }
780
781    fn handle_entry(
782        &mut self,
783        mut dent: DirEntry,
784    ) -> Option<Result<DirEntry>> {
785        if self.opts.follow_links && dent.file_type().is_symlink() {
786            dent = itry!(self.follow(dent));
787        }
788        let is_normal_dir = !dent.file_type().is_symlink() && dent.is_dir();
789        if is_normal_dir {
790            if self.opts.same_file_system && dent.depth() > 0 {
791                if itry!(self.is_same_file_system(&dent)) {
792                    itry!(self.push(&dent));
793                }
794            } else {
795                itry!(self.push(&dent));
796            }
797        } else if dent.depth() == 0 && dent.file_type().is_symlink() {
798            // As a special case, if we are processing a root entry, then we
799            // always follow it even if it's a symlink and follow_links is
800            // false. We are careful to not let this change the semantics of
801            // the DirEntry however. Namely, the DirEntry should still respect
802            // the follow_links setting. When it's disabled, it should report
803            // itself as a symlink. When it's enabled, it should always report
804            // itself as the target.
805            let md = itry!(fs::metadata(dent.path()).map_err(|err| {
806                Error::from_path(dent.depth(), dent.path().to_path_buf(), err)
807            }));
808            if md.file_type().is_dir() {
809                itry!(self.push(&dent));
810            }
811        }
812        if is_normal_dir && self.opts.contents_first {
813            self.deferred_dirs.push(dent);
814            None
815        } else if self.skippable() {
816            None
817        } else {
818            Some(Ok(dent))
819        }
820    }
821
822    fn get_deferred_dir(&mut self) -> Option<DirEntry> {
823        if self.opts.contents_first {
824            if self.depth < self.deferred_dirs.len() {
825                // Unwrap is safe here because we've guaranteed that
826                // `self.deferred_dirs.len()` can never be less than 1
827                let deferred: DirEntry = self
828                    .deferred_dirs
829                    .pop()
830                    .expect("BUG: deferred_dirs should be non-empty");
831                if !self.skippable() {
832                    return Some(deferred);
833                }
834            }
835        }
836        None
837    }
838
839    fn push(&mut self, dent: &DirEntry) -> Result<()> {
840        // Make room for another open file descriptor if we've hit the max.
841        let free =
842            self.stack_list.len().checked_sub(self.oldest_opened).unwrap();
843        if free == self.opts.max_open {
844            self.stack_list[self.oldest_opened].close();
845        }
846        // Open a handle to reading the directory's entries.
847        let rd = fs::read_dir(dent.path()).map_err(|err| {
848            Some(Error::from_path(self.depth, dent.path().to_path_buf(), err))
849        });
850        let mut list = DirList::Opened { depth: self.depth, it: rd };
851        if let Some(ref mut cmp) = self.opts.sorter {
852            let mut entries: Vec<_> = list.collect();
853            entries.sort_by(|a, b| match (a, b) {
854                (&Ok(ref a), &Ok(ref b)) => cmp(a, b),
855                (&Err(_), &Err(_)) => Ordering::Equal,
856                (&Ok(_), &Err(_)) => Ordering::Greater,
857                (&Err(_), &Ok(_)) => Ordering::Less,
858            });
859            list = DirList::Closed(entries.into_iter());
860        }
861        if self.opts.follow_links {
862            let ancestor = Ancestor::new(&dent)
863                .map_err(|err| Error::from_io(self.depth, err))?;
864            self.stack_path.push(ancestor);
865        }
866        // We push this after stack_path since creating the Ancestor can fail.
867        // If it fails, then we return the error and won't descend.
868        self.stack_list.push(list);
869        // If we had to close out a previous directory stream, then we need to
870        // increment our index the oldest still-open stream. We do this only
871        // after adding to our stack, in order to ensure that the oldest_opened
872        // index remains valid. The worst that can happen is that an already
873        // closed stream will be closed again, which is a no-op.
874        //
875        // We could move the close of the stream above into this if-body, but
876        // then we would have more than the maximum number of file descriptors
877        // open at a particular point in time.
878        if free == self.opts.max_open {
879            // Unwrap is safe here because self.oldest_opened is guaranteed to
880            // never be greater than `self.stack_list.len()`, which implies
881            // that the subtraction won't underflow and that adding 1 will
882            // never overflow.
883            self.oldest_opened = self.oldest_opened.checked_add(1).unwrap();
884        }
885        Ok(())
886    }
887
888    fn pop(&mut self) {
889        self.stack_list.pop().expect("BUG: cannot pop from empty stack");
890        if self.opts.follow_links {
891            self.stack_path.pop().expect("BUG: list/path stacks out of sync");
892        }
893        // If everything in the stack is already closed, then there is
894        // room for at least one more open descriptor and it will
895        // always be at the top of the stack.
896        self.oldest_opened = min(self.oldest_opened, self.stack_list.len());
897    }
898
899    fn follow(&self, mut dent: DirEntry) -> Result<DirEntry> {
900        dent =
901            DirEntry::from_path(self.depth, dent.path().to_path_buf(), true)?;
902        // The only way a symlink can cause a loop is if it points
903        // to a directory. Otherwise, it always points to a leaf
904        // and we can omit any loop checks.
905        if dent.is_dir() {
906            self.check_loop(dent.path())?;
907        }
908        Ok(dent)
909    }
910
911    fn check_loop<P: AsRef<Path>>(&self, child: P) -> Result<()> {
912        let hchild = Handle::from_path(&child)
913            .map_err(|err| Error::from_io(self.depth, err))?;
914        for ancestor in self.stack_path.iter().rev() {
915            let is_same = ancestor
916                .is_same(&hchild)
917                .map_err(|err| Error::from_io(self.depth, err))?;
918            if is_same {
919                return Err(Error::from_loop(
920                    self.depth,
921                    &ancestor.path,
922                    child.as_ref(),
923                ));
924            }
925        }
926        Ok(())
927    }
928
929    fn is_same_file_system(&mut self, dent: &DirEntry) -> Result<bool> {
930        let dent_device = util::device_num(dent.path())
931            .map_err(|err| Error::from_entry(dent, err))?;
932        Ok(self
933            .root_device
934            .map(|d| d == dent_device)
935            .expect("BUG: called is_same_file_system without root device"))
936    }
937
938    fn skippable(&self) -> bool {
939        self.depth < self.opts.min_depth || self.depth > self.opts.max_depth
940    }
941}
942
943impl DirList {
944    fn close(&mut self) {
945        if let DirList::Opened { .. } = *self {
946            *self = DirList::Closed(self.collect::<Vec<_>>().into_iter());
947        }
948    }
949}
950
951impl Iterator for DirList {
952    type Item = Result<DirEntry>;
953
954    #[inline(always)]
955    fn next(&mut self) -> Option<Result<DirEntry>> {
956        match *self {
957            DirList::Closed(ref mut it) => it.next(),
958            DirList::Opened { depth, ref mut it } => match *it {
959                Err(ref mut err) => err.take().map(Err),
960                Ok(ref mut rd) => rd.next().map(|r| match r {
961                    Ok(r) => DirEntry::from_entry(depth + 1, &r),
962                    Err(err) => Err(Error::from_io(depth + 1, err)),
963                }),
964            },
965        }
966    }
967}
968
969/// A recursive directory iterator that skips entries.
970///
971/// Values of this type are created by calling [`.filter_entry()`] on an
972/// `IntoIter`, which is formed by calling [`.into_iter()`] on a `WalkDir`.
973///
974/// Directories that fail the predicate `P` are skipped. Namely, they are
975/// never yielded and never descended into.
976///
977/// Entries that are skipped with the [`min_depth`] and [`max_depth`] options
978/// are not passed through this filter.
979///
980/// If opening a handle to a directory resulted in an error, then it is yielded
981/// and no corresponding call to the predicate is made.
982///
983/// Type parameter `I` refers to the underlying iterator and `P` refers to the
984/// predicate, which is usually `FnMut(&DirEntry) -> bool`.
985///
986/// [`.filter_entry()`]: struct.IntoIter.html#method.filter_entry
987/// [`.into_iter()`]: struct.WalkDir.html#into_iter.v
988/// [`min_depth`]: struct.WalkDir.html#method.min_depth
989/// [`max_depth`]: struct.WalkDir.html#method.max_depth
990#[derive(Debug)]
991pub struct FilterEntry<I, P> {
992    it: I,
993    predicate: P,
994}
995
996impl<P> Iterator for FilterEntry<IntoIter, P>
997where
998    P: FnMut(&DirEntry) -> bool,
999{
1000    type Item = Result<DirEntry>;
1001
1002    /// Advances the iterator and returns the next value.
1003    ///
1004    /// # Errors
1005    ///
1006    /// If the iterator fails to retrieve the next value, this method returns
1007    /// an error value. The error will be wrapped in an `Option::Some`.
1008    fn next(&mut self) -> Option<Result<DirEntry>> {
1009        loop {
1010            let dent = match self.it.next() {
1011                None => return None,
1012                Some(result) => itry!(result),
1013            };
1014            if !(self.predicate)(&dent) {
1015                if dent.is_dir() {
1016                    self.it.skip_current_dir();
1017                }
1018                continue;
1019            }
1020            return Some(Ok(dent));
1021        }
1022    }
1023}
1024
1025impl<P> FilterEntry<IntoIter, P>
1026where
1027    P: FnMut(&DirEntry) -> bool,
1028{
1029    /// Yields only entries which satisfy the given predicate and skips
1030    /// descending into directories that do not satisfy the given predicate.
1031    ///
1032    /// The predicate is applied to all entries. If the predicate is
1033    /// true, iteration carries on as normal. If the predicate is false, the
1034    /// entry is ignored and if it is a directory, it is not descended into.
1035    ///
1036    /// This is often more convenient to use than [`skip_current_dir`]. For
1037    /// example, to skip hidden files and directories efficiently on unix
1038    /// systems:
1039    ///
1040    /// ```no_run
1041    /// use walkdir::{DirEntry, WalkDir};
1042    /// # use walkdir::Error;
1043    ///
1044    /// fn is_hidden(entry: &DirEntry) -> bool {
1045    ///     entry.file_name()
1046    ///          .to_str()
1047    ///          .map(|s| s.starts_with("."))
1048    ///          .unwrap_or(false)
1049    /// }
1050    ///
1051    /// # fn try_main() -> Result<(), Error> {
1052    /// for entry in WalkDir::new("foo")
1053    ///                      .into_iter()
1054    ///                      .filter_entry(|e| !is_hidden(e)) {
1055    ///     println!("{}", entry?.path().display());
1056    /// }
1057    /// # Ok(())
1058    /// # }
1059    /// ```
1060    ///
1061    /// Note that the iterator will still yield errors for reading entries that
1062    /// may not satisfy the predicate.
1063    ///
1064    /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not
1065    /// passed to this predicate.
1066    ///
1067    /// Note that if the iterator has `contents_first` enabled, then this
1068    /// method is no different than calling the standard `Iterator::filter`
1069    /// method (because directory entries are yielded after they've been
1070    /// descended into).
1071    ///
1072    /// [`skip_current_dir`]: #method.skip_current_dir
1073    /// [`min_depth`]: struct.WalkDir.html#method.min_depth
1074    /// [`max_depth`]: struct.WalkDir.html#method.max_depth
1075    pub fn filter_entry(self, predicate: P) -> FilterEntry<Self, P> {
1076        FilterEntry { it: self, predicate: predicate }
1077    }
1078
1079    /// Skips the current directory.
1080    ///
1081    /// This causes the iterator to stop traversing the contents of the least
1082    /// recently yielded directory. This means any remaining entries in that
1083    /// directory will be skipped (including sub-directories).
1084    ///
1085    /// Note that the ergonomics of this method are questionable since it
1086    /// borrows the iterator mutably. Namely, you must write out the looping
1087    /// condition manually. For example, to skip hidden entries efficiently on
1088    /// unix systems:
1089    ///
1090    /// ```no_run
1091    /// use walkdir::{DirEntry, WalkDir};
1092    ///
1093    /// fn is_hidden(entry: &DirEntry) -> bool {
1094    ///     entry.file_name()
1095    ///          .to_str()
1096    ///          .map(|s| s.starts_with("."))
1097    ///          .unwrap_or(false)
1098    /// }
1099    ///
1100    /// let mut it = WalkDir::new("foo").into_iter();
1101    /// loop {
1102    ///     let entry = match it.next() {
1103    ///         None => break,
1104    ///         Some(Err(err)) => panic!("ERROR: {}", err),
1105    ///         Some(Ok(entry)) => entry,
1106    ///     };
1107    ///     if is_hidden(&entry) {
1108    ///         if entry.file_type().is_dir() {
1109    ///             it.skip_current_dir();
1110    ///         }
1111    ///         continue;
1112    ///     }
1113    ///     println!("{}", entry.path().display());
1114    /// }
1115    /// ```
1116    ///
1117    /// You may find it more convenient to use the [`filter_entry`] iterator
1118    /// adapter. (See its documentation for the same example functionality as
1119    /// above.)
1120    ///
1121    /// [`filter_entry`]: #method.filter_entry
1122    pub fn skip_current_dir(&mut self) {
1123        self.it.skip_current_dir();
1124    }
1125}