uuid/
parser.rs

1// Copyright 2013-2014 The Rust Project Developers.
2// Copyright 2018 The Uuid Project Developers.
3//
4// See the COPYRIGHT file at the top-level directory of this distribution.
5//
6// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
7// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
8// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
9// option. This file may not be copied, modified, or distributed
10// except according to those terms.
11
12//! [`Uuid`] parsing constructs and utilities.
13//!
14//! [`Uuid`]: ../struct.Uuid.html
15
16use crate::{
17    error::*,
18    std::{convert::TryFrom, str},
19    Uuid,
20};
21
22impl str::FromStr for Uuid {
23    type Err = Error;
24
25    fn from_str(uuid_str: &str) -> Result<Self, Self::Err> {
26        Uuid::parse_str(uuid_str)
27    }
28}
29
30impl TryFrom<&'_ str> for Uuid {
31    type Error = Error;
32
33    fn try_from(uuid_str: &'_ str) -> Result<Self, Self::Error> {
34        Uuid::parse_str(uuid_str)
35    }
36}
37
38impl Uuid {
39    /// Parses a `Uuid` from a string of hexadecimal digits with optional
40    /// hyphens.
41    ///
42    /// Any of the formats generated by this module (simple, hyphenated, urn,
43    /// Microsoft GUID) are supported by this parsing function.
44    ///
45    /// Prefer [`try_parse`] unless you need detailed user-facing diagnostics.
46    /// This method will be eventually deprecated in favor of `try_parse`.
47    ///
48    /// # Examples
49    ///
50    /// Parse a hyphenated UUID:
51    ///
52    /// ```
53    /// # use uuid::{Uuid, Version, Variant};
54    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
55    /// let uuid = Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")?;
56    ///
57    /// assert_eq!(Some(Version::Random), uuid.get_version());
58    /// assert_eq!(Variant::RFC4122, uuid.get_variant());
59    /// # Ok(())
60    /// # }
61    /// ```
62    ///
63    /// [`try_parse`]: #method.try_parse
64    pub fn parse_str(input: &str) -> Result<Uuid, Error> {
65        try_parse(input.as_bytes())
66            .map(Uuid::from_bytes)
67            .map_err(InvalidUuid::into_err)
68    }
69
70    /// Parses a `Uuid` from a string of hexadecimal digits with optional
71    /// hyphens.
72    ///
73    /// This function is similar to [`parse_str`], in fact `parse_str` shares
74    /// the same underlying parser. The difference is that if `try_parse`
75    /// fails, it won't generate very useful error messages. The `parse_str`
76    /// function will eventually be deprecated in favor or `try_parse`.
77    ///
78    /// To parse a UUID from a byte stream instead of a UTF8 string, see
79    /// [`try_parse_ascii`].
80    ///
81    /// # Examples
82    ///
83    /// Parse a hyphenated UUID:
84    ///
85    /// ```
86    /// # use uuid::{Uuid, Version, Variant};
87    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
88    /// let uuid = Uuid::try_parse("550e8400-e29b-41d4-a716-446655440000")?;
89    ///
90    /// assert_eq!(Some(Version::Random), uuid.get_version());
91    /// assert_eq!(Variant::RFC4122, uuid.get_variant());
92    /// # Ok(())
93    /// # }
94    /// ```
95    ///
96    /// [`parse_str`]: #method.parse_str
97    /// [`try_parse_ascii`]: #method.try_parse_ascii
98    pub const fn try_parse(input: &str) -> Result<Uuid, Error> {
99        Self::try_parse_ascii(input.as_bytes())
100    }
101
102    /// Parses a `Uuid` from a string of hexadecimal digits with optional
103    /// hyphens.
104    ///
105    /// The input is expected to be a string of ASCII characters. This method
106    /// can be more convenient than [`try_parse`] if the UUID is being
107    /// parsed from a byte stream instead of from a UTF8 string.
108    ///
109    /// # Examples
110    ///
111    /// Parse a hyphenated UUID:
112    ///
113    /// ```
114    /// # use uuid::{Uuid, Version, Variant};
115    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
116    /// let uuid = Uuid::try_parse_ascii(b"550e8400-e29b-41d4-a716-446655440000")?;
117    ///
118    /// assert_eq!(Some(Version::Random), uuid.get_version());
119    /// assert_eq!(Variant::RFC4122, uuid.get_variant());
120    /// # Ok(())
121    /// # }
122    /// ```
123    ///
124    /// [`try_parse`]: #method.try_parse
125    pub const fn try_parse_ascii(input: &[u8]) -> Result<Uuid, Error> {
126        match try_parse(input) {
127            Ok(bytes) => Ok(Uuid::from_bytes(bytes)),
128            // If parsing fails then we don't know exactly what went wrong
129            // In this case, we just return a generic error
130            Err(_) => Err(Error(ErrorKind::Other)),
131        }
132    }
133}
134
135const fn try_parse(input: &[u8]) -> Result<[u8; 16], InvalidUuid> {
136    let result = match (input.len(), input) {
137        // Inputs of 32 bytes must be a non-hyphenated UUID
138        (32, s) => parse_simple(s),
139        // Hyphenated UUIDs may be wrapped in various ways:
140        // - `{UUID}` for braced UUIDs
141        // - `urn:uuid:UUID` for URNs
142        // - `UUID` for a regular hyphenated UUID
143        (36, s)
144        | (38, [b'{', s @ .., b'}'])
145        | (
146            45,
147            [b'u', b'r', b'n', b':', b'u', b'u', b'i', b'd', b':', s @ ..],
148        ) => parse_hyphenated(s),
149        // Any other shaped input is immediately invalid
150        _ => Err(()),
151    };
152
153    match result {
154        Ok(b) => Ok(b),
155        Err(()) => Err(InvalidUuid(input)),
156    }
157}
158
159#[inline]
160const fn parse_simple(s: &[u8]) -> Result<[u8; 16], ()> {
161    // This length check here removes all other bounds
162    // checks in this function
163    if s.len() != 32 {
164        return Err(());
165    }
166
167    let mut buf: [u8; 16] = [0; 16];
168    let mut i = 0;
169
170    while i < 16 {
171        // Convert a two-char hex value (like `A8`)
172        // into a byte (like `10101000`)
173        let h1 = HEX_TABLE[s[i * 2] as usize];
174        let h2 = HEX_TABLE[s[i * 2 + 1] as usize];
175
176        // We use `0xff` as a sentinel value to indicate
177        // an invalid hex character sequence (like the letter `G`)
178        if h1 | h2 == 0xff {
179            return Err(());
180        }
181
182        // The upper nibble needs to be shifted into position
183        // to produce the final byte value
184        buf[i] = SHL4_TABLE[h1 as usize] | h2;
185        i += 1;
186    }
187
188    Ok(buf)
189}
190
191#[inline]
192const fn parse_hyphenated(s: &[u8]) -> Result<[u8; 16], ()> {
193    // This length check here removes all other bounds
194    // checks in this function
195    if s.len() != 36 {
196        return Err(());
197    }
198
199    // We look at two hex-encoded values (4 chars) at a time because
200    // that's the size of the smallest group in a hyphenated UUID.
201    // The indexes we're interested in are:
202    //
203    // uuid     : 936da01f-9abd-4d9d-80c7-02af85c822a8
204    //            |   |   ||   ||   ||   ||   |   |
205    // hyphens  : |   |   8|  13|  18|  23|   |   |
206    // positions: 0   4    9   14   19   24  28  32
207
208    // First, ensure the hyphens appear in the right places
209    match [s[8], s[13], s[18], s[23]] {
210        [b'-', b'-', b'-', b'-'] => {}
211        _ => return Err(()),
212    }
213
214    let positions: [u8; 8] = [0, 4, 9, 14, 19, 24, 28, 32];
215    let mut buf: [u8; 16] = [0; 16];
216    let mut j = 0;
217
218    while j < 8 {
219        let i = positions[j];
220
221        // The decoding here is the same as the simple case
222        // We're just dealing with two values instead of one
223        let h1 = HEX_TABLE[s[i as usize] as usize];
224        let h2 = HEX_TABLE[s[(i + 1) as usize] as usize];
225        let h3 = HEX_TABLE[s[(i + 2) as usize] as usize];
226        let h4 = HEX_TABLE[s[(i + 3) as usize] as usize];
227
228        if h1 | h2 | h3 | h4 == 0xff {
229            return Err(());
230        }
231
232        buf[j * 2] = SHL4_TABLE[h1 as usize] | h2;
233        buf[j * 2 + 1] = SHL4_TABLE[h3 as usize] | h4;
234        j += 1;
235    }
236
237    Ok(buf)
238}
239
240const HEX_TABLE: &[u8; 256] = &{
241    let mut buf = [0; 256];
242    let mut i: u8 = 0;
243
244    loop {
245        buf[i as usize] = match i {
246            b'0'..=b'9' => i - b'0',
247            b'a'..=b'f' => i - b'a' + 10,
248            b'A'..=b'F' => i - b'A' + 10,
249            _ => 0xff,
250        };
251
252        if i == 255 {
253            break buf;
254        }
255
256        i += 1
257    }
258};
259
260const SHL4_TABLE: &[u8; 256] = &{
261    let mut buf = [0; 256];
262    let mut i: u8 = 0;
263
264    loop {
265        buf[i as usize] = i.wrapping_shl(4);
266
267        if i == 255 {
268            break buf;
269        }
270
271        i += 1;
272    }
273};
274
275#[cfg(test)]
276mod tests {
277    use super::*;
278    use crate::{std::string::ToString, tests::new};
279
280    #[test]
281    fn test_parse_uuid_v4_valid() {
282        let from_hyphenated =
283            Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
284        let from_simple =
285            Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c8").unwrap();
286        let from_urn =
287            Uuid::parse_str("urn:uuid:67e55044-10b1-426f-9247-bb680e5fe0c8")
288                .unwrap();
289        let from_guid =
290            Uuid::parse_str("{67e55044-10b1-426f-9247-bb680e5fe0c8}").unwrap();
291
292        assert_eq!(from_hyphenated, from_simple);
293        assert_eq!(from_hyphenated, from_urn);
294        assert_eq!(from_hyphenated, from_guid);
295
296        assert!(Uuid::parse_str("00000000000000000000000000000000").is_ok());
297        assert!(Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").is_ok());
298        assert!(Uuid::parse_str("F9168C5E-CEB2-4faa-B6BF-329BF39FA1E4").is_ok());
299        assert!(Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c8").is_ok());
300        assert!(Uuid::parse_str("01020304-1112-2122-3132-414243444546").is_ok());
301        assert!(Uuid::parse_str(
302            "urn:uuid:67e55044-10b1-426f-9247-bb680e5fe0c8"
303        )
304        .is_ok());
305        assert!(
306            Uuid::parse_str("{6d93bade-bd9f-4e13-8914-9474e1e3567b}").is_ok()
307        );
308
309        // Nil
310        let nil = Uuid::nil();
311        assert_eq!(
312            Uuid::parse_str("00000000000000000000000000000000").unwrap(),
313            nil
314        );
315        assert_eq!(
316            Uuid::parse_str("00000000-0000-0000-0000-000000000000").unwrap(),
317            nil
318        );
319    }
320
321    #[test]
322    fn test_parse_uuid_v4_invalid() {
323        // Invalid
324        assert_eq!(
325            Uuid::parse_str(""),
326            Err(Error(ErrorKind::SimpleLength { len: 0 }))
327        );
328
329        assert_eq!(
330            Uuid::parse_str("!"),
331            Err(Error(ErrorKind::Char {
332                character: '!',
333                index: 1,
334            }))
335        );
336
337        assert_eq!(
338            Uuid::parse_str("F9168C5E-CEB2-4faa-B6BF-329BF39FA1E45"),
339            Err(Error(ErrorKind::GroupLength {
340                group: 4,
341                len: 13,
342                index: 25,
343            }))
344        );
345
346        assert_eq!(
347            Uuid::parse_str("F9168C5E-CEB2-4faa-BBF-329BF39FA1E4"),
348            Err(Error(ErrorKind::GroupLength {
349                group: 3,
350                len: 3,
351                index: 20,
352            }))
353        );
354
355        assert_eq!(
356            Uuid::parse_str("F9168C5E-CEB2-4faa-BGBF-329BF39FA1E4"),
357            Err(Error(ErrorKind::Char {
358                character: 'G',
359                index: 21,
360            }))
361        );
362
363        assert_eq!(
364            Uuid::parse_str("F9168C5E-CEB2F4faaFB6BFF329BF39FA1E4"),
365            Err(Error(ErrorKind::GroupCount { count: 2 }))
366        );
367
368        assert_eq!(
369            Uuid::parse_str("F9168C5E-CEB2-4faaFB6BFF329BF39FA1E4"),
370            Err(Error(ErrorKind::GroupCount { count: 3 }))
371        );
372
373        assert_eq!(
374            Uuid::parse_str("F9168C5E-CEB2-4faa-B6BFF329BF39FA1E4"),
375            Err(Error(ErrorKind::GroupCount { count: 4 }))
376        );
377
378        assert_eq!(
379            Uuid::parse_str("F9168C5E-CEB2-4faa"),
380            Err(Error(ErrorKind::GroupCount { count: 3 }))
381        );
382
383        assert_eq!(
384            Uuid::parse_str("F9168C5E-CEB2-4faaXB6BFF329BF39FA1E4"),
385            Err(Error(ErrorKind::Char {
386                character: 'X',
387                index: 19,
388            }))
389        );
390
391        assert_eq!(
392            Uuid::parse_str("{F9168C5E-CEB2-4faa9B6BFF329BF39FA1E41"),
393            Err(Error(ErrorKind::Char {
394                character: '{',
395                index: 1,
396            }))
397        );
398
399        assert_eq!(
400            Uuid::parse_str("{F9168C5E-CEB2-4faa9B6BFF329BF39FA1E41}"),
401            Err(Error(ErrorKind::GroupCount { count: 3 }))
402        );
403
404        assert_eq!(
405            Uuid::parse_str("F9168C5E-CEB-24fa-eB6BFF32-BF39FA1E4"),
406            Err(Error(ErrorKind::GroupLength {
407                group: 1,
408                len: 3,
409                index: 10,
410            }))
411        );
412
413        // // (group, found, expecting)
414        // //
415        assert_eq!(
416            Uuid::parse_str("01020304-1112-2122-3132-41424344"),
417            Err(Error(ErrorKind::GroupLength {
418                group: 4,
419                len: 8,
420                index: 25,
421            }))
422        );
423
424        assert_eq!(
425            Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c"),
426            Err(Error(ErrorKind::SimpleLength { len: 31 }))
427        );
428
429        assert_eq!(
430            Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c88"),
431            Err(Error(ErrorKind::SimpleLength { len: 33 }))
432        );
433
434        assert_eq!(
435            Uuid::parse_str("67e5504410b1426f9247bb680e5fe0cg8"),
436            Err(Error(ErrorKind::Char {
437                character: 'g',
438                index: 32,
439            }))
440        );
441
442        assert_eq!(
443            Uuid::parse_str("67e5504410b1426%9247bb680e5fe0c8"),
444            Err(Error(ErrorKind::Char {
445                character: '%',
446                index: 16,
447            }))
448        );
449
450        assert_eq!(
451            Uuid::parse_str("231231212212423424324323477343246663"),
452            Err(Error(ErrorKind::SimpleLength { len: 36 }))
453        );
454
455        assert_eq!(
456            Uuid::parse_str("{00000000000000000000000000000000}"),
457            Err(Error(ErrorKind::GroupCount { count: 1 }))
458        );
459
460        assert_eq!(
461            Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c"),
462            Err(Error(ErrorKind::SimpleLength { len: 31 }))
463        );
464
465        assert_eq!(
466            Uuid::parse_str("67e550X410b1426f9247bb680e5fe0cd"),
467            Err(Error(ErrorKind::Char {
468                character: 'X',
469                index: 7,
470            }))
471        );
472
473        assert_eq!(
474            Uuid::parse_str("67e550-4105b1426f9247bb680e5fe0c"),
475            Err(Error(ErrorKind::GroupCount { count: 2 }))
476        );
477
478        assert_eq!(
479            Uuid::parse_str("F9168C5E-CEB2-4faa-B6BF1-02BF39FA1E4"),
480            Err(Error(ErrorKind::GroupLength {
481                group: 3,
482                len: 5,
483                index: 20,
484            }))
485        );
486    }
487
488    #[test]
489    fn test_roundtrip_default() {
490        let uuid_orig = new();
491        let orig_str = uuid_orig.to_string();
492        let uuid_out = Uuid::parse_str(&orig_str).unwrap();
493        assert_eq!(uuid_orig, uuid_out);
494    }
495
496    #[test]
497    fn test_roundtrip_hyphenated() {
498        let uuid_orig = new();
499        let orig_str = uuid_orig.hyphenated().to_string();
500        let uuid_out = Uuid::parse_str(&orig_str).unwrap();
501        assert_eq!(uuid_orig, uuid_out);
502    }
503
504    #[test]
505    fn test_roundtrip_simple() {
506        let uuid_orig = new();
507        let orig_str = uuid_orig.simple().to_string();
508        let uuid_out = Uuid::parse_str(&orig_str).unwrap();
509        assert_eq!(uuid_orig, uuid_out);
510    }
511
512    #[test]
513    fn test_roundtrip_urn() {
514        let uuid_orig = new();
515        let orig_str = uuid_orig.urn().to_string();
516        let uuid_out = Uuid::parse_str(&orig_str).unwrap();
517        assert_eq!(uuid_orig, uuid_out);
518    }
519
520    #[test]
521    fn test_roundtrip_braced() {
522        let uuid_orig = new();
523        let orig_str = uuid_orig.braced().to_string();
524        let uuid_out = Uuid::parse_str(&orig_str).unwrap();
525        assert_eq!(uuid_orig, uuid_out);
526    }
527
528    #[test]
529    fn test_try_parse_ascii_non_utf8() {
530        assert!(Uuid::try_parse_ascii(
531            b"67e55044-10b1-426f-9247-bb680e5\0e0c8"
532        )
533        .is_err());
534    }
535}