clap/
osstringext.rs

1use std::ffi::OsStr;
2#[cfg(not(any(target_os = "windows", target_arch = "wasm32")))]
3use std::os::unix::ffi::OsStrExt;
4#[cfg(any(target_os = "windows", target_arch = "wasm32"))]
5use crate::INVALID_UTF8;
6
7#[cfg(any(target_os = "windows", target_arch = "wasm32"))]
8pub trait OsStrExt3 {
9    fn from_bytes(b: &[u8]) -> &Self;
10    fn as_bytes(&self) -> &[u8];
11}
12
13#[doc(hidden)]
14pub trait OsStrExt2 {
15    fn starts_with(&self, s: &[u8]) -> bool;
16    fn split_at_byte(&self, b: u8) -> (&OsStr, &OsStr);
17    fn split_at(&self, i: usize) -> (&OsStr, &OsStr);
18    fn trim_left_matches(&self, b: u8) -> &OsStr;
19    fn contains_byte(&self, b: u8) -> bool;
20    fn split(&self, b: u8) -> OsSplit;
21}
22
23// A starts-with implementation that does not panic when the OsStr contains
24// invalid Unicode.
25//
26// A Windows OsStr is usually UTF-16. If `prefix` is valid UTF-8, we can
27// re-encode it as UTF-16, and ask whether `osstr` starts with the same series
28// of u16 code units. If `prefix` is not valid UTF-8, then this comparison
29// isn't meaningful, and we just return false.
30#[cfg(target_os = "windows")]
31fn windows_osstr_starts_with(osstr: &OsStr, prefix: &[u8]) -> bool {
32    use std::os::windows::ffi::OsStrExt;
33    let prefix_str = if let Ok(s) = std::str::from_utf8(prefix) {
34        s
35    } else {
36        return false;
37    };
38    let mut osstr_units = osstr.encode_wide();
39    let mut prefix_units = prefix_str.encode_utf16();
40    loop {
41        match (osstr_units.next(), prefix_units.next()) {
42            // These code units match. Keep looping.
43            (Some(o), Some(p)) if o == p => continue,
44            // We've reached the end of the prefix. It's a match.
45            (_, None) => return true,
46            // Otherwise, it's not a match.
47            _ => return false,
48        }
49    }
50}
51
52#[test]
53#[cfg(target_os = "windows")]
54fn test_windows_osstr_starts_with() {
55    use std::ffi::OsString;
56    use std::os::windows::ffi::OsStringExt;
57
58    fn from_ascii(ascii: &[u8]) -> OsString {
59        let u16_vec: Vec<u16> = ascii.iter().map(|&c| c as u16).collect();
60        OsString::from_wide(&u16_vec)
61    }
62
63    // Test all the basic cases.
64    assert!(windows_osstr_starts_with(&from_ascii(b"abcdef"), b"abc"));
65    assert!(windows_osstr_starts_with(&from_ascii(b"abcdef"), b"abcdef"));
66    assert!(!windows_osstr_starts_with(&from_ascii(b"abcdef"), b"def"));
67    assert!(!windows_osstr_starts_with(&from_ascii(b"abc"), b"abcd"));
68
69    // Test the case where the candidate prefix is not valid UTF-8. Note that a
70    // standalone \xff byte is valid ASCII but not valid UTF-8. Thus although
71    // these strings look identical, they do not match.
72    assert!(!windows_osstr_starts_with(&from_ascii(b"\xff"), b"\xff"));
73
74    // Test the case where the OsString is not valid UTF-16. It should still be
75    // possible to match the valid characters at the front.
76    //
77    // UTF-16 surrogate characters are only valid in pairs. Including one on
78    // the end by itself makes this invalid UTF-16.
79    let surrogate_char: u16 = 0xDC00;
80    let invalid_unicode =
81        OsString::from_wide(&['a' as u16, 'b' as u16, 'c' as u16, surrogate_char]);
82    assert!(
83        invalid_unicode.to_str().is_none(),
84        "This string is invalid Unicode, and conversion to &str should fail.",
85    );
86    assert!(windows_osstr_starts_with(&invalid_unicode, b"abc"));
87    assert!(!windows_osstr_starts_with(&invalid_unicode, b"abcd"));
88}
89
90#[cfg(any(target_os = "windows", target_arch = "wasm32"))]
91impl OsStrExt3 for OsStr {
92    fn from_bytes(b: &[u8]) -> &Self {
93        use std::mem;
94        unsafe { mem::transmute(b) }
95    }
96    fn as_bytes(&self) -> &[u8] {
97        self.to_str().map(|s| s.as_bytes()).expect(INVALID_UTF8)
98    }
99}
100
101impl OsStrExt2 for OsStr {
102    fn starts_with(&self, s: &[u8]) -> bool {
103        #[cfg(target_os = "windows")]
104        {
105            // On Windows, the as_bytes() method will panic if the OsStr
106            // contains invalid Unicode. To avoid this, we use a
107            // Windows-specific starts-with function that doesn't rely on
108            // as_bytes(). This is necessary for Windows command line
109            // applications to handle non-Unicode arguments successfully. This
110            // allows common cases like `clap.exe [invalid]` to succeed, though
111            // cases that require string splitting will still fail, like
112            // `clap.exe --arg=[invalid]`. Note that this entire module is
113            // replaced in Clap 3.x, so this workaround is specific to the 2.x
114            // branch.
115            windows_osstr_starts_with(self, s)
116        }
117        #[cfg(not(target_os = "windows"))]
118        {
119            self.as_bytes().starts_with(s)
120        }
121    }
122
123    fn contains_byte(&self, byte: u8) -> bool {
124        for b in self.as_bytes() {
125            if b == &byte {
126                return true;
127            }
128        }
129        false
130    }
131
132    fn split_at_byte(&self, byte: u8) -> (&OsStr, &OsStr) {
133        for (i, b) in self.as_bytes().iter().enumerate() {
134            if b == &byte {
135                return (
136                    OsStr::from_bytes(&self.as_bytes()[..i]),
137                    OsStr::from_bytes(&self.as_bytes()[i + 1..]),
138                );
139            }
140        }
141        (
142            &*self,
143            OsStr::from_bytes(&self.as_bytes()[self.len()..self.len()]),
144        )
145    }
146
147    fn trim_left_matches(&self, byte: u8) -> &OsStr {
148        let mut found = false;
149        for (i, b) in self.as_bytes().iter().enumerate() {
150            if b != &byte {
151                return OsStr::from_bytes(&self.as_bytes()[i..]);
152            } else {
153                found = true;
154            }
155        }
156        if found {
157            return OsStr::from_bytes(&self.as_bytes()[self.len()..]);
158        }
159        &*self
160    }
161
162    fn split_at(&self, i: usize) -> (&OsStr, &OsStr) {
163        (
164            OsStr::from_bytes(&self.as_bytes()[..i]),
165            OsStr::from_bytes(&self.as_bytes()[i..]),
166        )
167    }
168
169    fn split(&self, b: u8) -> OsSplit {
170        OsSplit {
171            sep: b,
172            val: self.as_bytes(),
173            pos: 0,
174        }
175    }
176}
177
178#[doc(hidden)]
179#[derive(Clone, Debug)]
180pub struct OsSplit<'a> {
181    sep: u8,
182    val: &'a [u8],
183    pos: usize,
184}
185
186impl<'a> Iterator for OsSplit<'a> {
187    type Item = &'a OsStr;
188
189    fn next(&mut self) -> Option<&'a OsStr> {
190        debugln!("OsSplit::next: self={:?}", self);
191        if self.pos == self.val.len() {
192            return None;
193        }
194        let start = self.pos;
195        for b in &self.val[start..] {
196            self.pos += 1;
197            if *b == self.sep {
198                return Some(OsStr::from_bytes(&self.val[start..self.pos - 1]));
199            }
200        }
201        Some(OsStr::from_bytes(&self.val[start..]))
202    }
203}