glob/
lib.rs

1// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! Support for matching file paths against Unix shell style patterns.
12//!
13//! The `glob` and `glob_with` functions allow querying the filesystem for all
14//! files that match a particular pattern (similar to the libc `glob` function).
15//! The methods on the `Pattern` type provide functionality for checking if
16//! individual paths match a particular pattern (similar to the libc `fnmatch`
17//! function).
18//!
19//! For consistency across platforms, and for Windows support, this module
20//! is implemented entirely in Rust rather than deferring to the libc
21//! `glob`/`fnmatch` functions.
22//!
23//! # Examples
24//!
25//! To print all jpg files in `/media/` and all of its subdirectories.
26//!
27//! ```rust,no_run
28//! use glob::glob;
29//!
30//! for entry in glob("/media/**/*.jpg").expect("Failed to read glob pattern") {
31//!     match entry {
32//!         Ok(path) => println!("{:?}", path.display()),
33//!         Err(e) => println!("{:?}", e),
34//!     }
35//! }
36//! ```
37//!
38//! To print all files containing the letter "a", case insensitive, in a `local`
39//! directory relative to the current working directory. This ignores errors
40//! instead of printing them.
41//!
42//! ```rust,no_run
43//! use glob::glob_with;
44//! use glob::MatchOptions;
45//!
46//! let options = MatchOptions {
47//!     case_sensitive: false,
48//!     require_literal_separator: false,
49//!     require_literal_leading_dot: false,
50//! };
51//! for entry in glob_with("local/*a*", options).unwrap() {
52//!     if let Ok(path) = entry {
53//!         println!("{:?}", path.display())
54//!     }
55//! }
56//! ```
57
58#![doc(
59    html_logo_url = "https://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png",
60    html_favicon_url = "https://www.rust-lang.org/favicon.ico",
61    html_root_url = "https://docs.rs/glob/0.3.0"
62)]
63#![deny(missing_docs)]
64#![cfg_attr(all(test, windows), feature(std_misc))]
65
66use std::cmp;
67use std::error::Error;
68use std::fmt;
69use std::fs;
70use std::io;
71use std::path::{self, Component, Path, PathBuf};
72use std::str::FromStr;
73
74use CharSpecifier::{CharRange, SingleChar};
75use MatchResult::{EntirePatternDoesntMatch, Match, SubPatternDoesntMatch};
76use PatternToken::AnyExcept;
77use PatternToken::{AnyChar, AnyRecursiveSequence, AnySequence, AnyWithin, Char};
78
79/// An iterator that yields `Path`s from the filesystem that match a particular
80/// pattern.
81///
82/// Note that it yields `GlobResult` in order to report any `IoErrors` that may
83/// arise during iteration. If a directory matches but is unreadable,
84/// thereby preventing its contents from being checked for matches, a
85/// `GlobError` is returned to express this.
86///
87/// See the `glob` function for more details.
88pub struct Paths {
89    dir_patterns: Vec<Pattern>,
90    require_dir: bool,
91    options: MatchOptions,
92    todo: Vec<Result<(PathBuf, usize), GlobError>>,
93    scope: Option<PathBuf>,
94}
95
96/// Return an iterator that produces all the `Path`s that match the given
97/// pattern using default match options, which may be absolute or relative to
98/// the current working directory.
99///
100/// This may return an error if the pattern is invalid.
101///
102/// This method uses the default match options and is equivalent to calling
103/// `glob_with(pattern, MatchOptions::new())`. Use `glob_with` directly if you
104/// want to use non-default match options.
105///
106/// When iterating, each result is a `GlobResult` which expresses the
107/// possibility that there was an `IoError` when attempting to read the contents
108/// of the matched path.  In other words, each item returned by the iterator
109/// will either be an `Ok(Path)` if the path matched, or an `Err(GlobError)` if
110/// the path (partially) matched _but_ its contents could not be read in order
111/// to determine if its contents matched.
112///
113/// See the `Paths` documentation for more information.
114///
115/// # Examples
116///
117/// Consider a directory `/media/pictures` containing only the files
118/// `kittens.jpg`, `puppies.jpg` and `hamsters.gif`:
119///
120/// ```rust,no_run
121/// use glob::glob;
122///
123/// for entry in glob("/media/pictures/*.jpg").unwrap() {
124///     match entry {
125///         Ok(path) => println!("{:?}", path.display()),
126///
127///         // if the path matched but was unreadable,
128///         // thereby preventing its contents from matching
129///         Err(e) => println!("{:?}", e),
130///     }
131/// }
132/// ```
133///
134/// The above code will print:
135///
136/// ```ignore
137/// /media/pictures/kittens.jpg
138/// /media/pictures/puppies.jpg
139/// ```
140///
141/// If you want to ignore unreadable paths, you can use something like
142/// `filter_map`:
143///
144/// ```rust
145/// use glob::glob;
146/// use std::result::Result;
147///
148/// for path in glob("/media/pictures/*.jpg").unwrap().filter_map(Result::ok) {
149///     println!("{}", path.display());
150/// }
151/// ```
152/// Paths are yielded in alphabetical order.
153pub fn glob(pattern: &str) -> Result<Paths, PatternError> {
154    glob_with(pattern, MatchOptions::new())
155}
156
157/// Return an iterator that produces all the `Path`s that match the given
158/// pattern using the specified match options, which may be absolute or relative
159/// to the current working directory.
160///
161/// This may return an error if the pattern is invalid.
162///
163/// This function accepts Unix shell style patterns as described by
164/// `Pattern::new(..)`.  The options given are passed through unchanged to
165/// `Pattern::matches_with(..)` with the exception that
166/// `require_literal_separator` is always set to `true` regardless of the value
167/// passed to this function.
168///
169/// Paths are yielded in alphabetical order.
170pub fn glob_with(pattern: &str, options: MatchOptions) -> Result<Paths, PatternError> {
171    #[cfg(windows)]
172    fn check_windows_verbatim(p: &Path) -> bool {
173        use std::path::Prefix;
174        match p.components().next() {
175            Some(Component::Prefix(ref p)) => p.kind().is_verbatim(),
176            _ => false,
177        }
178    }
179    #[cfg(not(windows))]
180    fn check_windows_verbatim(_: &Path) -> bool {
181        false
182    }
183
184    #[cfg(windows)]
185    fn to_scope(p: &Path) -> PathBuf {
186        // FIXME handle volume relative paths here
187        p.to_path_buf()
188    }
189    #[cfg(not(windows))]
190    fn to_scope(p: &Path) -> PathBuf {
191        p.to_path_buf()
192    }
193
194    // make sure that the pattern is valid first, else early return with error
195    if let Err(err) = Pattern::new(pattern) {
196        return Err(err);
197    }
198
199    let mut components = Path::new(pattern).components().peekable();
200    loop {
201        match components.peek() {
202            Some(&Component::Prefix(..)) | Some(&Component::RootDir) => {
203                components.next();
204            }
205            _ => break,
206        }
207    }
208    let rest = components.map(|s| s.as_os_str()).collect::<PathBuf>();
209    let normalized_pattern = Path::new(pattern).iter().collect::<PathBuf>();
210    let root_len = normalized_pattern.to_str().unwrap().len() - rest.to_str().unwrap().len();
211    let root = if root_len > 0 {
212        Some(Path::new(&pattern[..root_len]))
213    } else {
214        None
215    };
216
217    if root_len > 0 && check_windows_verbatim(root.unwrap()) {
218        // FIXME: How do we want to handle verbatim paths? I'm inclined to
219        // return nothing, since we can't very well find all UNC shares with a
220        // 1-letter server name.
221        return Ok(Paths {
222            dir_patterns: Vec::new(),
223            require_dir: false,
224            options,
225            todo: Vec::new(),
226            scope: None,
227        });
228    }
229
230    let scope = root.map_or_else(|| PathBuf::from("."), to_scope);
231
232    let mut dir_patterns = Vec::new();
233    let components =
234        pattern[cmp::min(root_len, pattern.len())..].split_terminator(path::is_separator);
235
236    for component in components {
237        dir_patterns.push(Pattern::new(component)?);
238    }
239
240    if root_len == pattern.len() {
241        dir_patterns.push(Pattern {
242            original: "".to_string(),
243            tokens: Vec::new(),
244            is_recursive: false,
245        });
246    }
247
248    let last_is_separator = pattern.chars().next_back().map(path::is_separator);
249    let require_dir = last_is_separator == Some(true);
250    let todo = Vec::new();
251
252    Ok(Paths {
253        dir_patterns,
254        require_dir,
255        options,
256        todo,
257        scope: Some(scope),
258    })
259}
260
261/// A glob iteration error.
262///
263/// This is typically returned when a particular path cannot be read
264/// to determine if its contents match the glob pattern. This is possible
265/// if the program lacks the appropriate permissions, for example.
266#[derive(Debug)]
267pub struct GlobError {
268    path: PathBuf,
269    error: io::Error,
270}
271
272impl GlobError {
273    /// The Path that the error corresponds to.
274    pub fn path(&self) -> &Path {
275        &self.path
276    }
277
278    /// The error in question.
279    pub fn error(&self) -> &io::Error {
280        &self.error
281    }
282
283    /// Consumes self, returning the _raw_ underlying `io::Error`
284    pub fn into_error(self) -> io::Error {
285        self.error
286    }
287}
288
289impl Error for GlobError {
290    fn description(&self) -> &str {
291        self.error.description()
292    }
293
294    fn cause(&self) -> Option<&Error> {
295        Some(&self.error)
296    }
297}
298
299impl fmt::Display for GlobError {
300    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
301        write!(
302            f,
303            "attempting to read `{}` resulted in an error: {}",
304            self.path.display(),
305            self.error
306        )
307    }
308}
309
310fn is_dir(p: &Path) -> bool {
311    fs::metadata(p).map(|m| m.is_dir()).unwrap_or(false)
312}
313
314/// An alias for a glob iteration result.
315///
316/// This represents either a matched path or a glob iteration error,
317/// such as failing to read a particular directory's contents.
318pub type GlobResult = Result<PathBuf, GlobError>;
319
320impl Iterator for Paths {
321    type Item = GlobResult;
322
323    fn next(&mut self) -> Option<GlobResult> {
324        // the todo buffer hasn't been initialized yet, so it's done at this
325        // point rather than in glob() so that the errors are unified that is,
326        // failing to fill the buffer is an iteration error construction of the
327        // iterator (i.e. glob()) only fails if it fails to compile the Pattern
328        if let Some(scope) = self.scope.take() {
329            if !self.dir_patterns.is_empty() {
330                // Shouldn't happen, but we're using -1 as a special index.
331                assert!(self.dir_patterns.len() < !0 as usize);
332
333                fill_todo(&mut self.todo, &self.dir_patterns, 0, &scope, self.options);
334            }
335        }
336
337        loop {
338            if self.dir_patterns.is_empty() || self.todo.is_empty() {
339                return None;
340            }
341
342            let (path, mut idx) = match self.todo.pop().unwrap() {
343                Ok(pair) => pair,
344                Err(e) => return Some(Err(e)),
345            };
346
347            // idx -1: was already checked by fill_todo, maybe path was '.' or
348            // '..' that we can't match here because of normalization.
349            if idx == !0 as usize {
350                if self.require_dir && !is_dir(&path) {
351                    continue;
352                }
353                return Some(Ok(path));
354            }
355
356            if self.dir_patterns[idx].is_recursive {
357                let mut next = idx;
358
359                // collapse consecutive recursive patterns
360                while (next + 1) < self.dir_patterns.len()
361                    && self.dir_patterns[next + 1].is_recursive
362                {
363                    next += 1;
364                }
365
366                if is_dir(&path) {
367                    // the path is a directory, so it's a match
368
369                    // push this directory's contents
370                    fill_todo(
371                        &mut self.todo,
372                        &self.dir_patterns,
373                        next,
374                        &path,
375                        self.options,
376                    );
377
378                    if next == self.dir_patterns.len() - 1 {
379                        // pattern ends in recursive pattern, so return this
380                        // directory as a result
381                        return Some(Ok(path));
382                    } else {
383                        // advanced to the next pattern for this path
384                        idx = next + 1;
385                    }
386                } else if next == self.dir_patterns.len() - 1 {
387                    // not a directory and it's the last pattern, meaning no
388                    // match
389                    continue;
390                } else {
391                    // advanced to the next pattern for this path
392                    idx = next + 1;
393                }
394            }
395
396            // not recursive, so match normally
397            if self.dir_patterns[idx].matches_with(
398                {
399                    match path.file_name().and_then(|s| s.to_str()) {
400                        // FIXME (#9639): How do we handle non-utf8 filenames?
401                        // Ignore them for now; ideally we'd still match them
402                        // against a *
403                        None => continue,
404                        Some(x) => x,
405                    }
406                },
407                self.options,
408            ) {
409                if idx == self.dir_patterns.len() - 1 {
410                    // it is not possible for a pattern to match a directory
411                    // *AND* its children so we don't need to check the
412                    // children
413
414                    if !self.require_dir || is_dir(&path) {
415                        return Some(Ok(path));
416                    }
417                } else {
418                    fill_todo(
419                        &mut self.todo,
420                        &self.dir_patterns,
421                        idx + 1,
422                        &path,
423                        self.options,
424                    );
425                }
426            }
427        }
428    }
429}
430
431/// A pattern parsing error.
432#[derive(Debug)]
433#[allow(missing_copy_implementations)]
434pub struct PatternError {
435    /// The approximate character index of where the error occurred.
436    pub pos: usize,
437
438    /// A message describing the error.
439    pub msg: &'static str,
440}
441
442impl Error for PatternError {
443    fn description(&self) -> &str {
444        self.msg
445    }
446}
447
448impl fmt::Display for PatternError {
449    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
450        write!(
451            f,
452            "Pattern syntax error near position {}: {}",
453            self.pos, self.msg
454        )
455    }
456}
457
458/// A compiled Unix shell style pattern.
459///
460/// - `?` matches any single character.
461///
462/// - `*` matches any (possibly empty) sequence of characters.
463///
464/// - `**` matches the current directory and arbitrary subdirectories. This
465///   sequence **must** form a single path component, so both `**a` and `b**`
466///   are invalid and will result in an error.  A sequence of more than two
467///   consecutive `*` characters is also invalid.
468///
469/// - `[...]` matches any character inside the brackets.  Character sequences
470///   can also specify ranges of characters, as ordered by Unicode, so e.g.
471///   `[0-9]` specifies any character between 0 and 9 inclusive. An unclosed
472///   bracket is invalid.
473///
474/// - `[!...]` is the negation of `[...]`, i.e. it matches any characters
475///   **not** in the brackets.
476///
477/// - The metacharacters `?`, `*`, `[`, `]` can be matched by using brackets
478///   (e.g. `[?]`).  When a `]` occurs immediately following `[` or `[!` then it
479///   is interpreted as being part of, rather then ending, the character set, so
480///   `]` and NOT `]` can be matched by `[]]` and `[!]]` respectively.  The `-`
481///   character can be specified inside a character sequence pattern by placing
482///   it at the start or the end, e.g. `[abc-]`.
483#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
484pub struct Pattern {
485    original: String,
486    tokens: Vec<PatternToken>,
487    is_recursive: bool,
488}
489
490/// Show the original glob pattern.
491impl fmt::Display for Pattern {
492    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
493        self.original.fmt(f)
494    }
495}
496
497impl FromStr for Pattern {
498    type Err = PatternError;
499
500    fn from_str(s: &str) -> Result<Self, PatternError> {
501        Self::new(s)
502    }
503}
504
505#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
506enum PatternToken {
507    Char(char),
508    AnyChar,
509    AnySequence,
510    AnyRecursiveSequence,
511    AnyWithin(Vec<CharSpecifier>),
512    AnyExcept(Vec<CharSpecifier>),
513}
514
515#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
516enum CharSpecifier {
517    SingleChar(char),
518    CharRange(char, char),
519}
520
521#[derive(Copy, Clone, PartialEq)]
522enum MatchResult {
523    Match,
524    SubPatternDoesntMatch,
525    EntirePatternDoesntMatch,
526}
527
528const ERROR_WILDCARDS: &str = "wildcards are either regular `*` or recursive `**`";
529const ERROR_RECURSIVE_WILDCARDS: &str = "recursive wildcards must form a single path \
530                                         component";
531const ERROR_INVALID_RANGE: &str = "invalid range pattern";
532
533impl Pattern {
534    /// This function compiles Unix shell style patterns.
535    ///
536    /// An invalid glob pattern will yield a `PatternError`.
537    pub fn new(pattern: &str) -> Result<Self, PatternError> {
538        let chars = pattern.chars().collect::<Vec<_>>();
539        let mut tokens = Vec::new();
540        let mut is_recursive = false;
541        let mut i = 0;
542
543        while i < chars.len() {
544            match chars[i] {
545                '?' => {
546                    tokens.push(AnyChar);
547                    i += 1;
548                }
549                '*' => {
550                    let old = i;
551
552                    while i < chars.len() && chars[i] == '*' {
553                        i += 1;
554                    }
555
556                    let count = i - old;
557
558                    if count > 2 {
559                        return Err(PatternError {
560                            pos: old + 2,
561                            msg: ERROR_WILDCARDS,
562                        });
563                    } else if count == 2 {
564                        // ** can only be an entire path component
565                        // i.e. a/**/b is valid, but a**/b or a/**b is not
566                        // invalid matches are treated literally
567                        let is_valid = if i == 2 || path::is_separator(chars[i - count - 1]) {
568                            // it ends in a '/'
569                            if i < chars.len() && path::is_separator(chars[i]) {
570                                i += 1;
571                                true
572                            // or the pattern ends here
573                            // this enables the existing globbing mechanism
574                            } else if i == chars.len() {
575                                true
576                            // `**` ends in non-separator
577                            } else {
578                                return Err(PatternError {
579                                    pos: i,
580                                    msg: ERROR_RECURSIVE_WILDCARDS,
581                                });
582                            }
583                        // `**` begins with non-separator
584                        } else {
585                            return Err(PatternError {
586                                pos: old - 1,
587                                msg: ERROR_RECURSIVE_WILDCARDS,
588                            });
589                        };
590
591                        let tokens_len = tokens.len();
592
593                        if is_valid {
594                            // collapse consecutive AnyRecursiveSequence to a
595                            // single one
596                            if !(tokens_len > 1 && tokens[tokens_len - 1] == AnyRecursiveSequence) {
597                                is_recursive = true;
598                                tokens.push(AnyRecursiveSequence);
599                            }
600                        }
601                    } else {
602                        tokens.push(AnySequence);
603                    }
604                }
605                '[' => {
606                    if i + 4 <= chars.len() && chars[i + 1] == '!' {
607                        match chars[i + 3..].iter().position(|x| *x == ']') {
608                            None => (),
609                            Some(j) => {
610                                let chars = &chars[i + 2..i + 3 + j];
611                                let cs = parse_char_specifiers(chars);
612                                tokens.push(AnyExcept(cs));
613                                i += j + 4;
614                                continue;
615                            }
616                        }
617                    } else if i + 3 <= chars.len() && chars[i + 1] != '!' {
618                        match chars[i + 2..].iter().position(|x| *x == ']') {
619                            None => (),
620                            Some(j) => {
621                                let cs = parse_char_specifiers(&chars[i + 1..i + 2 + j]);
622                                tokens.push(AnyWithin(cs));
623                                i += j + 3;
624                                continue;
625                            }
626                        }
627                    }
628
629                    // if we get here then this is not a valid range pattern
630                    return Err(PatternError {
631                        pos: i,
632                        msg: ERROR_INVALID_RANGE,
633                    });
634                }
635                c => {
636                    tokens.push(Char(c));
637                    i += 1;
638                }
639            }
640        }
641
642        Ok(Self {
643            tokens,
644            original: pattern.to_string(),
645            is_recursive,
646        })
647    }
648
649    /// Escape metacharacters within the given string by surrounding them in
650    /// brackets. The resulting string will, when compiled into a `Pattern`,
651    /// match the input string and nothing else.
652    pub fn escape(s: &str) -> String {
653        let mut escaped = String::new();
654        for c in s.chars() {
655            match c {
656                // note that ! does not need escaping because it is only special
657                // inside brackets
658                '?' | '*' | '[' | ']' => {
659                    escaped.push('[');
660                    escaped.push(c);
661                    escaped.push(']');
662                }
663                c => {
664                    escaped.push(c);
665                }
666            }
667        }
668        escaped
669    }
670
671    /// Return if the given `str` matches this `Pattern` using the default
672    /// match options (i.e. `MatchOptions::new()`).
673    ///
674    /// # Examples
675    ///
676    /// ```rust
677    /// use glob::Pattern;
678    ///
679    /// assert!(Pattern::new("c?t").unwrap().matches("cat"));
680    /// assert!(Pattern::new("k[!e]tteh").unwrap().matches("kitteh"));
681    /// assert!(Pattern::new("d*g").unwrap().matches("doog"));
682    /// ```
683    pub fn matches(&self, str: &str) -> bool {
684        self.matches_with(str, MatchOptions::new())
685    }
686
687    /// Return if the given `Path`, when converted to a `str`, matches this
688    /// `Pattern` using the default match options (i.e. `MatchOptions::new()`).
689    pub fn matches_path(&self, path: &Path) -> bool {
690        // FIXME (#9639): This needs to handle non-utf8 paths
691        path.to_str().map_or(false, |s| self.matches(s))
692    }
693
694    /// Return if the given `str` matches this `Pattern` using the specified
695    /// match options.
696    pub fn matches_with(&self, str: &str, options: MatchOptions) -> bool {
697        self.matches_from(true, str.chars(), 0, options) == Match
698    }
699
700    /// Return if the given `Path`, when converted to a `str`, matches this
701    /// `Pattern` using the specified match options.
702    pub fn matches_path_with(&self, path: &Path, options: MatchOptions) -> bool {
703        // FIXME (#9639): This needs to handle non-utf8 paths
704        path.to_str()
705            .map_or(false, |s| self.matches_with(s, options))
706    }
707
708    /// Access the original glob pattern.
709    pub fn as_str(&self) -> &str {
710        &self.original
711    }
712
713    fn matches_from(
714        &self,
715        mut follows_separator: bool,
716        mut file: std::str::Chars,
717        i: usize,
718        options: MatchOptions,
719    ) -> MatchResult {
720        for (ti, token) in self.tokens[i..].iter().enumerate() {
721            match *token {
722                AnySequence | AnyRecursiveSequence => {
723                    // ** must be at the start.
724                    debug_assert!(match *token {
725                        AnyRecursiveSequence => follows_separator,
726                        _ => true,
727                    });
728
729                    // Empty match
730                    match self.matches_from(follows_separator, file.clone(), i + ti + 1, options) {
731                        SubPatternDoesntMatch => (), // keep trying
732                        m => return m,
733                    };
734
735                    while let Some(c) = file.next() {
736                        if follows_separator && options.require_literal_leading_dot && c == '.' {
737                            return SubPatternDoesntMatch;
738                        }
739                        follows_separator = path::is_separator(c);
740                        match *token {
741                            AnyRecursiveSequence if !follows_separator => continue,
742                            AnySequence
743                                if options.require_literal_separator && follows_separator =>
744                            {
745                                return SubPatternDoesntMatch
746                            }
747                            _ => (),
748                        }
749                        match self.matches_from(
750                            follows_separator,
751                            file.clone(),
752                            i + ti + 1,
753                            options,
754                        ) {
755                            SubPatternDoesntMatch => (), // keep trying
756                            m => return m,
757                        }
758                    }
759                }
760                _ => {
761                    let c = match file.next() {
762                        Some(c) => c,
763                        None => return EntirePatternDoesntMatch,
764                    };
765
766                    let is_sep = path::is_separator(c);
767
768                    if !match *token {
769                        AnyChar | AnyWithin(..) | AnyExcept(..)
770                            if (options.require_literal_separator && is_sep)
771                                || (follows_separator
772                                    && options.require_literal_leading_dot
773                                    && c == '.') =>
774                        {
775                            false
776                        }
777                        AnyChar => true,
778                        AnyWithin(ref specifiers) => in_char_specifiers(&specifiers, c, options),
779                        AnyExcept(ref specifiers) => !in_char_specifiers(&specifiers, c, options),
780                        Char(c2) => chars_eq(c, c2, options.case_sensitive),
781                        AnySequence | AnyRecursiveSequence => unreachable!(),
782                    } {
783                        return SubPatternDoesntMatch;
784                    }
785                    follows_separator = is_sep;
786                }
787            }
788        }
789
790        // Iter is fused.
791        if file.next().is_none() {
792            Match
793        } else {
794            SubPatternDoesntMatch
795        }
796    }
797}
798
799// Fills `todo` with paths under `path` to be matched by `patterns[idx]`,
800// special-casing patterns to match `.` and `..`, and avoiding `readdir()`
801// calls when there are no metacharacters in the pattern.
802fn fill_todo(
803    todo: &mut Vec<Result<(PathBuf, usize), GlobError>>,
804    patterns: &[Pattern],
805    idx: usize,
806    path: &Path,
807    options: MatchOptions,
808) {
809    // convert a pattern that's just many Char(_) to a string
810    fn pattern_as_str(pattern: &Pattern) -> Option<String> {
811        let mut s = String::new();
812        for token in &pattern.tokens {
813            match *token {
814                Char(c) => s.push(c),
815                _ => return None,
816            }
817        }
818
819        Some(s)
820    }
821
822    let add = |todo: &mut Vec<_>, next_path: PathBuf| {
823        if idx + 1 == patterns.len() {
824            // We know it's good, so don't make the iterator match this path
825            // against the pattern again. In particular, it can't match
826            // . or .. globs since these never show up as path components.
827            todo.push(Ok((next_path, !0 as usize)));
828        } else {
829            fill_todo(todo, patterns, idx + 1, &next_path, options);
830        }
831    };
832
833    let pattern = &patterns[idx];
834    let is_dir = is_dir(path);
835    let curdir = path == Path::new(".");
836    match pattern_as_str(pattern) {
837        Some(s) => {
838            // This pattern component doesn't have any metacharacters, so we
839            // don't need to read the current directory to know where to
840            // continue. So instead of passing control back to the iterator,
841            // we can just check for that one entry and potentially recurse
842            // right away.
843            let special = "." == s || ".." == s;
844            let next_path = if curdir {
845                PathBuf::from(s)
846            } else {
847                path.join(&s)
848            };
849            if (special && is_dir) || (!special && fs::metadata(&next_path).is_ok()) {
850                add(todo, next_path);
851            }
852        }
853        None if is_dir => {
854            let dirs = fs::read_dir(path).and_then(|d| {
855                d.map(|e| {
856                    e.map(|e| {
857                        if curdir {
858                            PathBuf::from(e.path().file_name().unwrap())
859                        } else {
860                            e.path()
861                        }
862                    })
863                })
864                .collect::<Result<Vec<_>, _>>()
865            });
866            match dirs {
867                Ok(mut children) => {
868                    children.sort_by(|p1, p2| p2.file_name().cmp(&p1.file_name()));
869                    todo.extend(children.into_iter().map(|x| Ok((x, idx))));
870
871                    // Matching the special directory entries . and .. that
872                    // refer to the current and parent directory respectively
873                    // requires that the pattern has a leading dot, even if the
874                    // `MatchOptions` field `require_literal_leading_dot` is not
875                    // set.
876                    if !pattern.tokens.is_empty() && pattern.tokens[0] == Char('.') {
877                        for &special in &[".", ".."] {
878                            if pattern.matches_with(special, options) {
879                                add(todo, path.join(special));
880                            }
881                        }
882                    }
883                }
884                Err(e) => {
885                    todo.push(Err(GlobError {
886                        path: path.to_path_buf(),
887                        error: e,
888                    }));
889                }
890            }
891        }
892        None => {
893            // not a directory, nothing more to find
894        }
895    }
896}
897
898fn parse_char_specifiers(s: &[char]) -> Vec<CharSpecifier> {
899    let mut cs = Vec::new();
900    let mut i = 0;
901    while i < s.len() {
902        if i + 3 <= s.len() && s[i + 1] == '-' {
903            cs.push(CharRange(s[i], s[i + 2]));
904            i += 3;
905        } else {
906            cs.push(SingleChar(s[i]));
907            i += 1;
908        }
909    }
910    cs
911}
912
913fn in_char_specifiers(specifiers: &[CharSpecifier], c: char, options: MatchOptions) -> bool {
914    for &specifier in specifiers.iter() {
915        match specifier {
916            SingleChar(sc) => {
917                if chars_eq(c, sc, options.case_sensitive) {
918                    return true;
919                }
920            }
921            CharRange(start, end) => {
922                // FIXME: work with non-ascii chars properly (issue #1347)
923                if !options.case_sensitive && c.is_ascii() && start.is_ascii() && end.is_ascii() {
924                    let start = start.to_ascii_lowercase();
925                    let end = end.to_ascii_lowercase();
926
927                    let start_up = start.to_uppercase().next().unwrap();
928                    let end_up = end.to_uppercase().next().unwrap();
929
930                    // only allow case insensitive matching when
931                    // both start and end are within a-z or A-Z
932                    if start != start_up && end != end_up {
933                        let c = c.to_ascii_lowercase();
934                        if c >= start && c <= end {
935                            return true;
936                        }
937                    }
938                }
939
940                if c >= start && c <= end {
941                    return true;
942                }
943            }
944        }
945    }
946
947    false
948}
949
950/// A helper function to determine if two chars are (possibly case-insensitively) equal.
951fn chars_eq(a: char, b: char, case_sensitive: bool) -> bool {
952    if cfg!(windows) && path::is_separator(a) && path::is_separator(b) {
953        true
954    } else if !case_sensitive && a.is_ascii() && b.is_ascii() {
955        // FIXME: work with non-ascii chars properly (issue #9084)
956        a.to_ascii_lowercase() == b.to_ascii_lowercase()
957    } else {
958        a == b
959    }
960}
961
962/// Configuration options to modify the behaviour of `Pattern::matches_with(..)`.
963#[allow(missing_copy_implementations)]
964#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
965pub struct MatchOptions {
966    /// Whether or not patterns should be matched in a case-sensitive manner.
967    /// This currently only considers upper/lower case relationships between
968    /// ASCII characters, but in future this might be extended to work with
969    /// Unicode.
970    pub case_sensitive: bool,
971
972    /// Whether or not path-component separator characters (e.g. `/` on
973    /// Posix) must be matched by a literal `/`, rather than by `*` or `?` or
974    /// `[...]`.
975    pub require_literal_separator: bool,
976
977    /// Whether or not paths that contain components that start with a `.`
978    /// will require that `.` appears literally in the pattern; `*`, `?`, `**`,
979    /// or `[...]` will not match. This is useful because such files are
980    /// conventionally considered hidden on Unix systems and it might be
981    /// desirable to skip them when listing files.
982    pub require_literal_leading_dot: bool,
983}
984
985impl MatchOptions {
986    /// Constructs a new `MatchOptions` with default field values. This is used
987    /// when calling functions that do not take an explicit `MatchOptions`
988    /// parameter.
989    ///
990    /// This function always returns this value:
991    ///
992    /// ```rust,ignore
993    /// MatchOptions {
994    ///     case_sensitive: true,
995    ///     require_literal_separator: false,
996    ///     require_literal_leading_dot: false
997    /// }
998    /// ```
999    pub fn new() -> Self {
1000        Self {
1001            case_sensitive: true,
1002            require_literal_separator: false,
1003            require_literal_leading_dot: false,
1004        }
1005    }
1006}
1007
1008#[cfg(test)]
1009mod test {
1010    use super::{glob, MatchOptions, Pattern};
1011    use std::path::Path;
1012
1013    #[test]
1014    fn test_pattern_from_str() {
1015        assert!("a*b".parse::<Pattern>().unwrap().matches("a_b"));
1016        assert!("a/**b".parse::<Pattern>().unwrap_err().pos == 4);
1017    }
1018
1019    #[test]
1020    fn test_wildcard_errors() {
1021        assert!(Pattern::new("a/**b").unwrap_err().pos == 4);
1022        assert!(Pattern::new("a/bc**").unwrap_err().pos == 3);
1023        assert!(Pattern::new("a/*****").unwrap_err().pos == 4);
1024        assert!(Pattern::new("a/b**c**d").unwrap_err().pos == 2);
1025        assert!(Pattern::new("a**b").unwrap_err().pos == 0);
1026    }
1027
1028    #[test]
1029    fn test_unclosed_bracket_errors() {
1030        assert!(Pattern::new("abc[def").unwrap_err().pos == 3);
1031        assert!(Pattern::new("abc[!def").unwrap_err().pos == 3);
1032        assert!(Pattern::new("abc[").unwrap_err().pos == 3);
1033        assert!(Pattern::new("abc[!").unwrap_err().pos == 3);
1034        assert!(Pattern::new("abc[d").unwrap_err().pos == 3);
1035        assert!(Pattern::new("abc[!d").unwrap_err().pos == 3);
1036        assert!(Pattern::new("abc[]").unwrap_err().pos == 3);
1037        assert!(Pattern::new("abc[!]").unwrap_err().pos == 3);
1038    }
1039
1040    #[test]
1041    fn test_glob_errors() {
1042        assert!(glob("a/**b").err().unwrap().pos == 4);
1043        assert!(glob("abc[def").err().unwrap().pos == 3);
1044    }
1045
1046    // this test assumes that there is a /root directory and that
1047    // the user running this test is not root or otherwise doesn't
1048    // have permission to read its contents
1049    #[cfg(all(unix, not(target_os = "macos")))]
1050    #[test]
1051    fn test_iteration_errors() {
1052        use std::io;
1053        let mut iter = glob("/root/*").unwrap();
1054
1055        // GlobErrors shouldn't halt iteration
1056        let next = iter.next();
1057        assert!(next.is_some());
1058
1059        let err = next.unwrap();
1060        assert!(err.is_err());
1061
1062        let err = err.err().unwrap();
1063        assert!(err.path() == Path::new("/root"));
1064        assert!(err.error().kind() == io::ErrorKind::PermissionDenied);
1065    }
1066
1067    #[test]
1068    fn test_absolute_pattern() {
1069        assert!(glob("/").unwrap().next().is_some());
1070        assert!(glob("//").unwrap().next().is_some());
1071
1072        // assume that the filesystem is not empty!
1073        assert!(glob("/*").unwrap().next().is_some());
1074
1075        #[cfg(not(windows))]
1076        fn win() {}
1077
1078        #[cfg(windows)]
1079        fn win() {
1080            use std::env::current_dir;
1081            use std::ffi::AsOsStr;
1082
1083            // check windows absolute paths with host/device components
1084            let root_with_device = current_dir()
1085                .ok()
1086                .and_then(|p| p.prefix().map(|p| p.join("*")))
1087                .unwrap();
1088            // FIXME (#9639): This needs to handle non-utf8 paths
1089            assert!(glob(root_with_device.as_os_str().to_str().unwrap())
1090                .unwrap()
1091                .next()
1092                .is_some());
1093        }
1094        win()
1095    }
1096
1097    #[test]
1098    fn test_wildcards() {
1099        assert!(Pattern::new("a*b").unwrap().matches("a_b"));
1100        assert!(Pattern::new("a*b*c").unwrap().matches("abc"));
1101        assert!(!Pattern::new("a*b*c").unwrap().matches("abcd"));
1102        assert!(Pattern::new("a*b*c").unwrap().matches("a_b_c"));
1103        assert!(Pattern::new("a*b*c").unwrap().matches("a___b___c"));
1104        assert!(Pattern::new("abc*abc*abc")
1105            .unwrap()
1106            .matches("abcabcabcabcabcabcabc"));
1107        assert!(!Pattern::new("abc*abc*abc")
1108            .unwrap()
1109            .matches("abcabcabcabcabcabcabca"));
1110        assert!(Pattern::new("a*a*a*a*a*a*a*a*a")
1111            .unwrap()
1112            .matches("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"));
1113        assert!(Pattern::new("a*b[xyz]c*d").unwrap().matches("abxcdbxcddd"));
1114    }
1115
1116    #[test]
1117    fn test_recursive_wildcards() {
1118        let pat = Pattern::new("some/**/needle.txt").unwrap();
1119        assert!(pat.matches("some/needle.txt"));
1120        assert!(pat.matches("some/one/needle.txt"));
1121        assert!(pat.matches("some/one/two/needle.txt"));
1122        assert!(pat.matches("some/other/needle.txt"));
1123        assert!(!pat.matches("some/other/notthis.txt"));
1124
1125        // a single ** should be valid, for globs
1126        // Should accept anything
1127        let pat = Pattern::new("**").unwrap();
1128        assert!(pat.is_recursive);
1129        assert!(pat.matches("abcde"));
1130        assert!(pat.matches(""));
1131        assert!(pat.matches(".asdf"));
1132        assert!(pat.matches("/x/.asdf"));
1133
1134        // collapse consecutive wildcards
1135        let pat = Pattern::new("some/**/**/needle.txt").unwrap();
1136        assert!(pat.matches("some/needle.txt"));
1137        assert!(pat.matches("some/one/needle.txt"));
1138        assert!(pat.matches("some/one/two/needle.txt"));
1139        assert!(pat.matches("some/other/needle.txt"));
1140        assert!(!pat.matches("some/other/notthis.txt"));
1141
1142        // ** can begin the pattern
1143        let pat = Pattern::new("**/test").unwrap();
1144        assert!(pat.matches("one/two/test"));
1145        assert!(pat.matches("one/test"));
1146        assert!(pat.matches("test"));
1147
1148        // /** can begin the pattern
1149        let pat = Pattern::new("/**/test").unwrap();
1150        assert!(pat.matches("/one/two/test"));
1151        assert!(pat.matches("/one/test"));
1152        assert!(pat.matches("/test"));
1153        assert!(!pat.matches("/one/notthis"));
1154        assert!(!pat.matches("/notthis"));
1155
1156        // Only start sub-patterns on start of path segment.
1157        let pat = Pattern::new("**/.*").unwrap();
1158        assert!(pat.matches(".abc"));
1159        assert!(pat.matches("abc/.abc"));
1160        assert!(!pat.matches("ab.c"));
1161        assert!(!pat.matches("abc/ab.c"));
1162    }
1163
1164    #[test]
1165    fn test_lots_of_files() {
1166        // this is a good test because it touches lots of differently named files
1167        glob("/*/*/*/*").unwrap().skip(10000).next();
1168    }
1169
1170    #[test]
1171    fn test_range_pattern() {
1172        let pat = Pattern::new("a[0-9]b").unwrap();
1173        for i in 0..10 {
1174            assert!(pat.matches(&format!("a{}b", i)));
1175        }
1176        assert!(!pat.matches("a_b"));
1177
1178        let pat = Pattern::new("a[!0-9]b").unwrap();
1179        for i in 0..10 {
1180            assert!(!pat.matches(&format!("a{}b", i)));
1181        }
1182        assert!(pat.matches("a_b"));
1183
1184        let pats = ["[a-z123]", "[1a-z23]", "[123a-z]"];
1185        for &p in pats.iter() {
1186            let pat = Pattern::new(p).unwrap();
1187            for c in "abcdefghijklmnopqrstuvwxyz".chars() {
1188                assert!(pat.matches(&c.to_string()));
1189            }
1190            for c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ".chars() {
1191                let options = MatchOptions {
1192                    case_sensitive: false,
1193                    ..MatchOptions::new()
1194                };
1195                assert!(pat.matches_with(&c.to_string(), options));
1196            }
1197            assert!(pat.matches("1"));
1198            assert!(pat.matches("2"));
1199            assert!(pat.matches("3"));
1200        }
1201
1202        let pats = ["[abc-]", "[-abc]", "[a-c-]"];
1203        for &p in pats.iter() {
1204            let pat = Pattern::new(p).unwrap();
1205            assert!(pat.matches("a"));
1206            assert!(pat.matches("b"));
1207            assert!(pat.matches("c"));
1208            assert!(pat.matches("-"));
1209            assert!(!pat.matches("d"));
1210        }
1211
1212        let pat = Pattern::new("[2-1]").unwrap();
1213        assert!(!pat.matches("1"));
1214        assert!(!pat.matches("2"));
1215
1216        assert!(Pattern::new("[-]").unwrap().matches("-"));
1217        assert!(!Pattern::new("[!-]").unwrap().matches("-"));
1218    }
1219
1220    #[test]
1221    fn test_pattern_matches() {
1222        let txt_pat = Pattern::new("*hello.txt").unwrap();
1223        assert!(txt_pat.matches("hello.txt"));
1224        assert!(txt_pat.matches("gareth_says_hello.txt"));
1225        assert!(txt_pat.matches("some/path/to/hello.txt"));
1226        assert!(txt_pat.matches("some\\path\\to\\hello.txt"));
1227        assert!(txt_pat.matches("/an/absolute/path/to/hello.txt"));
1228        assert!(!txt_pat.matches("hello.txt-and-then-some"));
1229        assert!(!txt_pat.matches("goodbye.txt"));
1230
1231        let dir_pat = Pattern::new("*some/path/to/hello.txt").unwrap();
1232        assert!(dir_pat.matches("some/path/to/hello.txt"));
1233        assert!(dir_pat.matches("a/bigger/some/path/to/hello.txt"));
1234        assert!(!dir_pat.matches("some/path/to/hello.txt-and-then-some"));
1235        assert!(!dir_pat.matches("some/other/path/to/hello.txt"));
1236    }
1237
1238    #[test]
1239    fn test_pattern_escape() {
1240        let s = "_[_]_?_*_!_";
1241        assert_eq!(Pattern::escape(s), "_[[]_[]]_[?]_[*]_!_".to_string());
1242        assert!(Pattern::new(&Pattern::escape(s)).unwrap().matches(s));
1243    }
1244
1245    #[test]
1246    fn test_pattern_matches_case_insensitive() {
1247        let pat = Pattern::new("aBcDeFg").unwrap();
1248        let options = MatchOptions {
1249            case_sensitive: false,
1250            require_literal_separator: false,
1251            require_literal_leading_dot: false,
1252        };
1253
1254        assert!(pat.matches_with("aBcDeFg", options));
1255        assert!(pat.matches_with("abcdefg", options));
1256        assert!(pat.matches_with("ABCDEFG", options));
1257        assert!(pat.matches_with("AbCdEfG", options));
1258    }
1259
1260    #[test]
1261    fn test_pattern_matches_case_insensitive_range() {
1262        let pat_within = Pattern::new("[a]").unwrap();
1263        let pat_except = Pattern::new("[!a]").unwrap();
1264
1265        let options_case_insensitive = MatchOptions {
1266            case_sensitive: false,
1267            require_literal_separator: false,
1268            require_literal_leading_dot: false,
1269        };
1270        let options_case_sensitive = MatchOptions {
1271            case_sensitive: true,
1272            require_literal_separator: false,
1273            require_literal_leading_dot: false,
1274        };
1275
1276        assert!(pat_within.matches_with("a", options_case_insensitive));
1277        assert!(pat_within.matches_with("A", options_case_insensitive));
1278        assert!(!pat_within.matches_with("A", options_case_sensitive));
1279
1280        assert!(!pat_except.matches_with("a", options_case_insensitive));
1281        assert!(!pat_except.matches_with("A", options_case_insensitive));
1282        assert!(pat_except.matches_with("A", options_case_sensitive));
1283    }
1284
1285    #[test]
1286    fn test_pattern_matches_require_literal_separator() {
1287        let options_require_literal = MatchOptions {
1288            case_sensitive: true,
1289            require_literal_separator: true,
1290            require_literal_leading_dot: false,
1291        };
1292        let options_not_require_literal = MatchOptions {
1293            case_sensitive: true,
1294            require_literal_separator: false,
1295            require_literal_leading_dot: false,
1296        };
1297
1298        assert!(Pattern::new("abc/def")
1299            .unwrap()
1300            .matches_with("abc/def", options_require_literal));
1301        assert!(!Pattern::new("abc?def")
1302            .unwrap()
1303            .matches_with("abc/def", options_require_literal));
1304        assert!(!Pattern::new("abc*def")
1305            .unwrap()
1306            .matches_with("abc/def", options_require_literal));
1307        assert!(!Pattern::new("abc[/]def")
1308            .unwrap()
1309            .matches_with("abc/def", options_require_literal));
1310
1311        assert!(Pattern::new("abc/def")
1312            .unwrap()
1313            .matches_with("abc/def", options_not_require_literal));
1314        assert!(Pattern::new("abc?def")
1315            .unwrap()
1316            .matches_with("abc/def", options_not_require_literal));
1317        assert!(Pattern::new("abc*def")
1318            .unwrap()
1319            .matches_with("abc/def", options_not_require_literal));
1320        assert!(Pattern::new("abc[/]def")
1321            .unwrap()
1322            .matches_with("abc/def", options_not_require_literal));
1323    }
1324
1325    #[test]
1326    fn test_pattern_matches_require_literal_leading_dot() {
1327        let options_require_literal_leading_dot = MatchOptions {
1328            case_sensitive: true,
1329            require_literal_separator: false,
1330            require_literal_leading_dot: true,
1331        };
1332        let options_not_require_literal_leading_dot = MatchOptions {
1333            case_sensitive: true,
1334            require_literal_separator: false,
1335            require_literal_leading_dot: false,
1336        };
1337
1338        let f = |options| {
1339            Pattern::new("*.txt")
1340                .unwrap()
1341                .matches_with(".hello.txt", options)
1342        };
1343        assert!(f(options_not_require_literal_leading_dot));
1344        assert!(!f(options_require_literal_leading_dot));
1345
1346        let f = |options| {
1347            Pattern::new(".*.*")
1348                .unwrap()
1349                .matches_with(".hello.txt", options)
1350        };
1351        assert!(f(options_not_require_literal_leading_dot));
1352        assert!(f(options_require_literal_leading_dot));
1353
1354        let f = |options| {
1355            Pattern::new("aaa/bbb/*")
1356                .unwrap()
1357                .matches_with("aaa/bbb/.ccc", options)
1358        };
1359        assert!(f(options_not_require_literal_leading_dot));
1360        assert!(!f(options_require_literal_leading_dot));
1361
1362        let f = |options| {
1363            Pattern::new("aaa/bbb/*")
1364                .unwrap()
1365                .matches_with("aaa/bbb/c.c.c.", options)
1366        };
1367        assert!(f(options_not_require_literal_leading_dot));
1368        assert!(f(options_require_literal_leading_dot));
1369
1370        let f = |options| {
1371            Pattern::new("aaa/bbb/.*")
1372                .unwrap()
1373                .matches_with("aaa/bbb/.ccc", options)
1374        };
1375        assert!(f(options_not_require_literal_leading_dot));
1376        assert!(f(options_require_literal_leading_dot));
1377
1378        let f = |options| {
1379            Pattern::new("aaa/?bbb")
1380                .unwrap()
1381                .matches_with("aaa/.bbb", options)
1382        };
1383        assert!(f(options_not_require_literal_leading_dot));
1384        assert!(!f(options_require_literal_leading_dot));
1385
1386        let f = |options| {
1387            Pattern::new("aaa/[.]bbb")
1388                .unwrap()
1389                .matches_with("aaa/.bbb", options)
1390        };
1391        assert!(f(options_not_require_literal_leading_dot));
1392        assert!(!f(options_require_literal_leading_dot));
1393
1394        let f = |options| Pattern::new("**/*").unwrap().matches_with(".bbb", options);
1395        assert!(f(options_not_require_literal_leading_dot));
1396        assert!(!f(options_require_literal_leading_dot));
1397    }
1398
1399    #[test]
1400    fn test_matches_path() {
1401        // on windows, (Path::new("a/b").as_str().unwrap() == "a\\b"), so this
1402        // tests that / and \ are considered equivalent on windows
1403        assert!(Pattern::new("a/b").unwrap().matches_path(&Path::new("a/b")));
1404    }
1405
1406    #[test]
1407    fn test_path_join() {
1408        let pattern = Path::new("one").join(&Path::new("**/*.rs"));
1409        assert!(Pattern::new(pattern.to_str().unwrap()).is_ok());
1410    }
1411}