trust_dns_proto/rr/domain/
label.rs

1// Copyright 2015-2018 Benjamin Fry <benjaminfry@me.com>
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8//! Labels are used as the internal components of a Name.
9//!
10//! A label is stored internally as ascii, where all unicode characters are converted to punycode internally.
11
12#[allow(clippy::useless_attribute)]
13#[allow(unused)]
14#[allow(deprecated)]
15use std::ascii::AsciiExt;
16use std::borrow::Borrow;
17use std::cmp::{Ordering, PartialEq};
18use std::fmt::{self, Debug, Display, Formatter, Write};
19use std::hash::{Hash, Hasher};
20use tinyvec::TinyVec;
21
22use idna;
23use tracing::debug;
24
25use crate::error::*;
26
27const WILDCARD: &[u8] = b"*";
28const IDNA_PREFIX: &[u8] = b"xn--";
29
30/// Labels are always stored as ASCII, unicode characters must be encoded with punycode
31#[derive(Clone, Eq)]
32pub struct Label(TinyVec<[u8; 24]>);
33
34impl Label {
35    /// These must only be ASCII, with unicode encoded to PunyCode, or other such transformation.
36    ///
37    /// This uses the bytes as raw ascii values, with nothing escaped on the wire.
38    /// Generally users should use `from_str` or `from_ascii`
39    pub fn from_raw_bytes(bytes: &[u8]) -> ProtoResult<Self> {
40        // Check for label validity.
41        // RFC 2181, Section 11 "Name Syntax".
42        // > The length of any one label is limited to between 1 and 63 octets.
43        if bytes.is_empty() {
44            return Err("Label requires a minimum length of 1".into());
45        }
46        if bytes.len() > 63 {
47            return Err(format!("Label exceeds maximum length 63: {}", bytes.len()).into());
48        };
49        Ok(Self(TinyVec::from(bytes)))
50    }
51
52    /// Translates this string into IDNA safe name, encoding to punycode as necessary.
53    pub fn from_utf8(s: &str) -> ProtoResult<Self> {
54        if s.as_bytes() == WILDCARD {
55            return Ok(Self::wildcard());
56        }
57
58        // special case for SRV type records
59        if s.starts_with('_') {
60            return Self::from_ascii(s);
61        }
62
63        match idna::Config::default()
64            .use_std3_ascii_rules(true)
65            .transitional_processing(true)
66            .verify_dns_length(true)
67            .to_ascii(s)
68        {
69            Ok(puny) => Self::from_ascii(&puny),
70            e => Err(format!("Label contains invalid characters: {:?}", e).into()),
71        }
72    }
73
74    /// Takes the ascii string and returns a new label.
75    ///
76    /// This will return an Error if the label is not an ascii string
77    pub fn from_ascii(s: &str) -> ProtoResult<Self> {
78        if s.as_bytes() == WILDCARD {
79            return Ok(Self::wildcard());
80        }
81
82        if !s.is_empty()
83            && s.is_ascii()
84            && s.chars().take(1).all(|c| is_safe_ascii(c, true, false))
85            && s.chars().skip(1).all(|c| is_safe_ascii(c, false, false))
86        {
87            Self::from_raw_bytes(s.as_bytes())
88        } else {
89            Err(format!("Malformed label: {}", s).into())
90        }
91    }
92
93    /// Returns a new Label of the Wildcard, i.e. "*"
94    pub fn wildcard() -> Self {
95        Self(TinyVec::from(WILDCARD))
96    }
97
98    /// Converts this label to lowercase
99    pub fn to_lowercase(&self) -> Self {
100        // TODO: replace case conversion when (ascii_ctype #39658) stabilizes
101        if let Some((idx, _)) = self
102            .0
103            .iter()
104            .enumerate()
105            .find(|&(_, c)| *c != c.to_ascii_lowercase())
106        {
107            let mut lower_label: Vec<u8> = self.0.to_vec();
108            lower_label[idx..].make_ascii_lowercase();
109            Self(TinyVec::from(lower_label.as_slice()))
110        } else {
111            self.clone()
112        }
113    }
114
115    /// Returns true if this label is the wildcard, '*', label
116    pub fn is_wildcard(&self) -> bool {
117        self.as_bytes() == WILDCARD
118    }
119
120    /// Returns the lenght in bytes of this label
121    pub fn len(&self) -> usize {
122        self.0.len()
123    }
124
125    /// True if the label contains no characters
126    pub fn is_empty(&self) -> bool {
127        self.0.is_empty()
128    }
129
130    /// Returns the raw bytes of the label, this is good for writing to the wire.
131    ///
132    /// See [`Display`] for presentation version (unescaped from punycode, etc)
133    pub fn as_bytes(&self) -> &[u8] {
134        &self.0
135    }
136
137    /// Performs the equivalence operation disregarding case
138    pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
139        self.0.eq_ignore_ascii_case(&other.0)
140    }
141
142    /// compares with the other label, ignoring case
143    pub fn cmp_with_f<F: LabelCmp>(&self, other: &Self) -> Ordering {
144        let s = self.0.iter();
145        let o = other.0.iter();
146
147        for (s, o) in s.zip(o) {
148            match F::cmp_u8(*s, *o) {
149                Ordering::Equal => continue,
150                not_eq => return not_eq,
151            }
152        }
153
154        self.0.len().cmp(&other.0.len())
155    }
156
157    /// Performs the conversion to utf8 from IDNA as necessary, see `fmt` for more details
158    pub fn to_utf8(&self) -> String {
159        format!("{}", self)
160    }
161
162    /// Converts this label to safe ascii, escaping characters as necessary
163    ///
164    /// If this is an IDNA, punycode, label, then the xn-- prefix will be maintained as ascii
165    pub fn to_ascii(&self) -> String {
166        let mut ascii = String::with_capacity(self.as_bytes().len());
167
168        self.write_ascii(&mut ascii)
169            .expect("should never fail to write a new string");
170        ascii
171    }
172
173    /// Writes this label to safe ascii, escaping characters as necessary
174    pub fn write_ascii<W: Write>(&self, f: &mut W) -> Result<(), fmt::Error> {
175        // We can't guarantee that the same input will always translate to the same output
176        fn escape_non_ascii<W: Write>(
177            byte: u8,
178            f: &mut W,
179            is_first: bool,
180        ) -> Result<(), fmt::Error> {
181            let to_triple_escape = |ch: u8| format!("\\{:03o}", ch);
182            let to_single_escape = |ch: char| format!("\\{}", ch);
183
184            match char::from(byte) {
185                c if is_safe_ascii(c, is_first, true) => f.write_char(c)?,
186                // it's not a control and is printable as well as inside the standard ascii range
187                c if byte > b'\x20' && byte < b'\x7f' => f.write_str(&to_single_escape(c))?,
188                _ => f.write_str(&to_triple_escape(byte))?,
189            }
190
191            Ok(())
192        }
193
194        // traditional ascii case...
195        let mut chars = self.as_bytes().iter();
196        if let Some(ch) = chars.next() {
197            escape_non_ascii(*ch, f, true)?;
198        }
199
200        for ch in chars {
201            escape_non_ascii(*ch, f, false)?;
202        }
203
204        Ok(())
205    }
206}
207
208impl AsRef<[u8]> for Label {
209    fn as_ref(&self) -> &[u8] {
210        self.as_bytes()
211    }
212}
213
214impl Borrow<[u8]> for Label {
215    fn borrow(&self) -> &[u8] {
216        &self.0
217    }
218}
219
220fn is_safe_ascii(c: char, is_first: bool, for_encoding: bool) -> bool {
221    match c {
222        c if !c.is_ascii() => false,
223        c if c.is_alphanumeric() => true,
224        '-' if !is_first => true,     // dash is allowed
225        '_' => true,                  // SRV like labels
226        '*' if is_first => true,      // wildcard
227        '.' if !for_encoding => true, // needed to allow dots, for things like email addresses
228        _ => false,
229    }
230}
231
232impl Display for Label {
233    /// outputs characters in a safe string manner.
234    ///
235    /// if the string is punycode, i.e. starts with `xn--`, otherwise it translates to a safe ascii string
236    ///   escaping characters as necessary.
237    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
238        if self.as_bytes().starts_with(IDNA_PREFIX) {
239            // this should never be outside the ascii codes...
240            let label = String::from_utf8_lossy(self.borrow());
241            let (label, e) = idna::Config::default()
242                .use_std3_ascii_rules(false)
243                .transitional_processing(false)
244                .verify_dns_length(false)
245                .to_unicode(&label);
246
247            if e.is_ok() {
248                return f.write_str(&label);
249            } else {
250                debug!(
251                    "xn-- prefixed string did not translate via IDNA properly: {:?}",
252                    e
253                )
254            }
255        }
256
257        // it wasn't known to be utf8
258        self.write_ascii(f)
259    }
260}
261
262impl Debug for Label {
263    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
264        let label = String::from_utf8_lossy(self.borrow());
265        f.write_str(&label)
266    }
267}
268
269impl PartialEq<Self> for Label {
270    fn eq(&self, other: &Self) -> bool {
271        self.eq_ignore_ascii_case(other)
272    }
273}
274
275impl PartialOrd<Self> for Label {
276    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
277        Some(self.cmp(other))
278    }
279}
280
281impl Ord for Label {
282    fn cmp(&self, other: &Self) -> Ordering {
283        self.cmp_with_f::<CaseInsensitive>(other)
284    }
285}
286
287impl Hash for Label {
288    fn hash<H>(&self, state: &mut H)
289    where
290        H: Hasher,
291    {
292        for b in self.borrow() as &[u8] {
293            state.write_u8(b.to_ascii_lowercase());
294        }
295    }
296}
297
298/// Label comparison trait for case sensitive or insensitive comparisons
299pub trait LabelCmp {
300    /// this should mimic the cmp method from [`PartialOrd`]
301    fn cmp_u8(l: u8, r: u8) -> Ordering;
302}
303
304/// For case sensitive comparisons
305pub(super) struct CaseSensitive;
306
307impl LabelCmp for CaseSensitive {
308    fn cmp_u8(l: u8, r: u8) -> Ordering {
309        l.cmp(&r)
310    }
311}
312
313/// For case insensitive comparisons
314pub(super) struct CaseInsensitive;
315
316impl LabelCmp for CaseInsensitive {
317    fn cmp_u8(l: u8, r: u8) -> Ordering {
318        l.to_ascii_lowercase().cmp(&r.to_ascii_lowercase())
319    }
320}
321
322/// Conversion into a Label
323pub trait IntoLabel: Sized {
324    /// Convert this into Label
325    fn into_label(self) -> ProtoResult<Label>;
326}
327
328impl<'a> IntoLabel for &'a Label {
329    fn into_label(self) -> ProtoResult<Label> {
330        Ok(self.clone())
331    }
332}
333
334impl IntoLabel for Label {
335    fn into_label(self) -> ProtoResult<Label> {
336        Ok(self)
337    }
338}
339
340impl<'a> IntoLabel for &'a str {
341    fn into_label(self) -> ProtoResult<Label> {
342        Label::from_utf8(self)
343    }
344}
345
346impl IntoLabel for String {
347    fn into_label(self) -> ProtoResult<Label> {
348        Label::from_utf8(&self)
349    }
350}
351
352impl<'a> IntoLabel for &'a [u8] {
353    fn into_label(self) -> ProtoResult<Label> {
354        Label::from_raw_bytes(self)
355    }
356}
357
358impl IntoLabel for Vec<u8> {
359    fn into_label(self) -> ProtoResult<Label> {
360        Label::from_raw_bytes(&self)
361    }
362}
363
364#[cfg(test)]
365mod tests {
366    #![allow(clippy::dbg_macro, clippy::print_stdout)]
367
368    use super::*;
369
370    #[test]
371    fn test_encoding() {
372        assert_eq!(
373            Label::from_utf8("abc").unwrap(),
374            Label::from_raw_bytes(b"abc").unwrap()
375        );
376        // case insensitive, this works...
377        assert_eq!(
378            Label::from_utf8("ABC").unwrap(),
379            Label::from_raw_bytes(b"ABC").unwrap()
380        );
381        assert_eq!(
382            Label::from_utf8("🦀").unwrap(),
383            Label::from_raw_bytes(b"xn--zs9h").unwrap()
384        );
385        assert_eq!(
386            Label::from_utf8("rust-🦀-icon").unwrap(),
387            Label::from_raw_bytes(b"xn--rust--icon-9447i").unwrap()
388        );
389        assert_eq!(
390            Label::from_ascii("ben.fry").unwrap(),
391            Label::from_raw_bytes(b"ben.fry").unwrap()
392        );
393        assert_eq!(Label::from_utf8("🦀").unwrap().to_utf8(), "🦀");
394        assert_eq!(Label::from_utf8("🦀").unwrap().to_ascii(), "xn--zs9h");
395    }
396
397    #[test]
398    fn test_decoding() {
399        assert_eq!(Label::from_raw_bytes(b"abc").unwrap().to_string(), "abc");
400        assert_eq!(
401            Label::from_raw_bytes(b"xn--zs9h").unwrap().to_string(),
402            "🦀"
403        );
404        assert_eq!(
405            Label::from_raw_bytes(b"xn--rust--icon-9447i")
406                .unwrap()
407                .to_string(),
408            "rust-🦀-icon"
409        );
410    }
411
412    #[test]
413    fn test_to_lowercase() {
414        assert_ne!(Label::from_ascii("ABC").unwrap().to_string(), "abc");
415        assert_ne!(Label::from_ascii("abcDEF").unwrap().to_string(), "abcdef");
416        assert_eq!(
417            Label::from_ascii("ABC").unwrap().to_lowercase().to_string(),
418            "abc"
419        );
420        assert_eq!(
421            Label::from_ascii("abcDEF")
422                .unwrap()
423                .to_lowercase()
424                .to_string(),
425            "abcdef"
426        );
427    }
428
429    #[test]
430    fn test_to_cmp_f() {
431        assert_eq!(
432            Label::from_ascii("ABC")
433                .unwrap()
434                .cmp_with_f::<CaseInsensitive>(&Label::from_ascii("abc").unwrap()),
435            Ordering::Equal
436        );
437        assert_eq!(
438            Label::from_ascii("abcDEF")
439                .unwrap()
440                .cmp_with_f::<CaseInsensitive>(&Label::from_ascii("abcdef").unwrap()),
441            Ordering::Equal
442        );
443        assert_eq!(
444            Label::from_ascii("ABC")
445                .unwrap()
446                .cmp_with_f::<CaseSensitive>(&Label::from_ascii("abc").unwrap()),
447            Ordering::Less
448        );
449        assert_eq!(
450            Label::from_ascii("abcDEF")
451                .unwrap()
452                .cmp_with_f::<CaseSensitive>(&Label::from_ascii("abcdef").unwrap()),
453            Ordering::Less
454        );
455    }
456
457    #[test]
458    fn test_partial_cmp() {
459        let comparisons: Vec<(Label, Label)> = vec![
460            (
461                Label::from_raw_bytes(b"yljkjljk").unwrap(),
462                Label::from_raw_bytes(b"Z").unwrap(),
463            ),
464            (
465                Label::from_raw_bytes(b"Z").unwrap(),
466                Label::from_raw_bytes(b"zABC").unwrap(),
467            ),
468            (
469                Label::from_raw_bytes(&[1]).unwrap(),
470                Label::from_raw_bytes(b"*").unwrap(),
471            ),
472            (
473                Label::from_raw_bytes(b"*").unwrap(),
474                Label::from_raw_bytes(&[200]).unwrap(),
475            ),
476        ];
477
478        for (left, right) in comparisons {
479            println!("left: {}, right: {}", left, right);
480            assert_eq!(left.cmp(&right), Ordering::Less);
481        }
482    }
483
484    #[test]
485    fn test_is_wildcard() {
486        assert!(Label::from_raw_bytes(b"*").unwrap().is_wildcard());
487        assert!(Label::from_ascii("*").unwrap().is_wildcard());
488        assert!(Label::from_utf8("*").unwrap().is_wildcard());
489        assert!(!Label::from_raw_bytes(b"abc").unwrap().is_wildcard());
490    }
491
492    #[test]
493    fn test_ascii_escape() {
494        assert_eq!(
495            Label::from_raw_bytes(&[0o200]).unwrap().to_string(),
496            "\\200"
497        );
498        assert_eq!(
499            Label::from_raw_bytes(&[0o001]).unwrap().to_string(),
500            "\\001"
501        );
502        assert_eq!(Label::from_ascii(".").unwrap().to_ascii(), "\\.");
503        assert_eq!(
504            Label::from_ascii("ben.fry").unwrap().to_string(),
505            "ben\\.fry"
506        );
507        assert_eq!(Label::from_raw_bytes(&[0o200]).unwrap().to_ascii(), "\\200");
508    }
509}