url/
host.rs

1// Copyright 2013-2016 The rust-url developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9use std::cmp;
10use std::fmt::{self, Formatter};
11use std::net::{Ipv4Addr, Ipv6Addr};
12
13use percent_encoding::{percent_decode, utf8_percent_encode, CONTROLS};
14#[cfg(feature = "serde")]
15use serde::{Deserialize, Serialize};
16
17use crate::parser::{ParseError, ParseResult};
18
19#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
20#[derive(Copy, Clone, Debug, Eq, PartialEq)]
21pub(crate) enum HostInternal {
22    None,
23    Domain,
24    Ipv4(Ipv4Addr),
25    Ipv6(Ipv6Addr),
26}
27
28impl From<Host<String>> for HostInternal {
29    fn from(host: Host<String>) -> HostInternal {
30        match host {
31            Host::Domain(ref s) if s.is_empty() => HostInternal::None,
32            Host::Domain(_) => HostInternal::Domain,
33            Host::Ipv4(address) => HostInternal::Ipv4(address),
34            Host::Ipv6(address) => HostInternal::Ipv6(address),
35        }
36    }
37}
38
39/// The host name of an URL.
40#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
41#[derive(Clone, Debug, Eq, Ord, PartialOrd, Hash)]
42pub enum Host<S = String> {
43    /// A DNS domain name, as '.' dot-separated labels.
44    /// Non-ASCII labels are encoded in punycode per IDNA if this is the host of
45    /// a special URL, or percent encoded for non-special URLs. Hosts for
46    /// non-special URLs are also called opaque hosts.
47    Domain(S),
48
49    /// An IPv4 address.
50    /// `Url::host_str` returns the serialization of this address,
51    /// as four decimal integers separated by `.` dots.
52    Ipv4(Ipv4Addr),
53
54    /// An IPv6 address.
55    /// `Url::host_str` returns the serialization of that address between `[` and `]` brackets,
56    /// in the format per [RFC 5952 *A Recommendation
57    /// for IPv6 Address Text Representation*](https://tools.ietf.org/html/rfc5952):
58    /// lowercase hexadecimal with maximal `::` compression.
59    Ipv6(Ipv6Addr),
60}
61
62impl<'a> Host<&'a str> {
63    /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`.
64    pub fn to_owned(&self) -> Host<String> {
65        match *self {
66            Host::Domain(domain) => Host::Domain(domain.to_owned()),
67            Host::Ipv4(address) => Host::Ipv4(address),
68            Host::Ipv6(address) => Host::Ipv6(address),
69        }
70    }
71}
72
73impl Host<String> {
74    /// Parse a host: either an IPv6 address in [] square brackets, or a domain.
75    ///
76    /// <https://url.spec.whatwg.org/#host-parsing>
77    pub fn parse(input: &str) -> Result<Self, ParseError> {
78        if input.starts_with('[') {
79            if !input.ends_with(']') {
80                return Err(ParseError::InvalidIpv6Address);
81            }
82            return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
83        }
84        let domain = percent_decode(input.as_bytes()).decode_utf8_lossy();
85
86        let domain = Self::domain_to_ascii(&domain)?;
87
88        if domain.is_empty() {
89            return Err(ParseError::EmptyHost);
90        }
91
92        let is_invalid_domain_char = |c| {
93            matches!(
94                c,
95                '\0'..='\u{001F}'
96                    | ' '
97                    | '#'
98                    | '%'
99                    | '/'
100                    | ':'
101                    | '<'
102                    | '>'
103                    | '?'
104                    | '@'
105                    | '['
106                    | '\\'
107                    | ']'
108                    | '^'
109                    | '\u{007F}'
110                    | '|'
111            )
112        };
113
114        if domain.find(is_invalid_domain_char).is_some() {
115            Err(ParseError::InvalidDomainCharacter)
116        } else if ends_in_a_number(&domain) {
117            let address = parse_ipv4addr(&domain)?;
118            Ok(Host::Ipv4(address))
119        } else {
120            Ok(Host::Domain(domain))
121        }
122    }
123
124    // <https://url.spec.whatwg.org/#concept-opaque-host-parser>
125    pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
126        if input.starts_with('[') {
127            if !input.ends_with(']') {
128                return Err(ParseError::InvalidIpv6Address);
129            }
130            return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
131        }
132
133        let is_invalid_host_char = |c| {
134            matches!(
135                c,
136                '\0' | '\t'
137                    | '\n'
138                    | '\r'
139                    | ' '
140                    | '#'
141                    | '/'
142                    | ':'
143                    | '<'
144                    | '>'
145                    | '?'
146                    | '@'
147                    | '['
148                    | '\\'
149                    | ']'
150                    | '^'
151                    | '|'
152            )
153        };
154
155        if input.find(is_invalid_host_char).is_some() {
156            Err(ParseError::InvalidDomainCharacter)
157        } else {
158            Ok(Host::Domain(
159                utf8_percent_encode(input, CONTROLS).to_string(),
160            ))
161        }
162    }
163
164    /// convert domain with idna
165    fn domain_to_ascii(domain: &str) -> Result<String, ParseError> {
166        idna::domain_to_ascii(domain).map_err(Into::into)
167    }
168}
169
170impl<S: AsRef<str>> fmt::Display for Host<S> {
171    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
172        match *self {
173            Host::Domain(ref domain) => domain.as_ref().fmt(f),
174            Host::Ipv4(ref addr) => addr.fmt(f),
175            Host::Ipv6(ref addr) => {
176                f.write_str("[")?;
177                write_ipv6(addr, f)?;
178                f.write_str("]")
179            }
180        }
181    }
182}
183
184impl<S, T> PartialEq<Host<T>> for Host<S>
185where
186    S: PartialEq<T>,
187{
188    fn eq(&self, other: &Host<T>) -> bool {
189        match (self, other) {
190            (Host::Domain(a), Host::Domain(b)) => a == b,
191            (Host::Ipv4(a), Host::Ipv4(b)) => a == b,
192            (Host::Ipv6(a), Host::Ipv6(b)) => a == b,
193            (_, _) => false,
194        }
195    }
196}
197
198fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter<'_>) -> fmt::Result {
199    let segments = addr.segments();
200    let (compress_start, compress_end) = longest_zero_sequence(&segments);
201    let mut i = 0;
202    while i < 8 {
203        if i == compress_start {
204            f.write_str(":")?;
205            if i == 0 {
206                f.write_str(":")?;
207            }
208            if compress_end < 8 {
209                i = compress_end;
210            } else {
211                break;
212            }
213        }
214        write!(f, "{:x}", segments[i as usize])?;
215        if i < 7 {
216            f.write_str(":")?;
217        }
218        i += 1;
219    }
220    Ok(())
221}
222
223// https://url.spec.whatwg.org/#concept-ipv6-serializer step 2 and 3
224fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) {
225    let mut longest = -1;
226    let mut longest_length = -1;
227    let mut start = -1;
228    macro_rules! finish_sequence(
229        ($end: expr) => {
230            if start >= 0 {
231                let length = $end - start;
232                if length > longest_length {
233                    longest = start;
234                    longest_length = length;
235                }
236            }
237        };
238    );
239    for i in 0..8 {
240        if pieces[i as usize] == 0 {
241            if start < 0 {
242                start = i;
243            }
244        } else {
245            finish_sequence!(i);
246            start = -1;
247        }
248    }
249    finish_sequence!(8);
250    // https://url.spec.whatwg.org/#concept-ipv6-serializer
251    // step 3: ignore lone zeroes
252    if longest_length < 2 {
253        (-1, -2)
254    } else {
255        (longest, longest + longest_length)
256    }
257}
258
259/// <https://url.spec.whatwg.org/#ends-in-a-number-checker>
260fn ends_in_a_number(input: &str) -> bool {
261    let mut parts = input.rsplit('.');
262    let last = parts.next().unwrap();
263    let last = if last.is_empty() {
264        if let Some(last) = parts.next() {
265            last
266        } else {
267            return false;
268        }
269    } else {
270        last
271    };
272    if !last.is_empty() && last.chars().all(|c| ('0'..='9').contains(&c)) {
273        return true;
274    }
275
276    parse_ipv4number(last).is_ok()
277}
278
279/// <https://url.spec.whatwg.org/#ipv4-number-parser>
280/// Ok(None) means the input is a valid number, but it overflows a `u32`.
281fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
282    if input.is_empty() {
283        return Err(());
284    }
285
286    let mut r = 10;
287    if input.starts_with("0x") || input.starts_with("0X") {
288        input = &input[2..];
289        r = 16;
290    } else if input.len() >= 2 && input.starts_with('0') {
291        input = &input[1..];
292        r = 8;
293    }
294
295    if input.is_empty() {
296        return Ok(Some(0));
297    }
298
299    let valid_number = match r {
300        8 => input.chars().all(|c| ('0'..='7').contains(&c)),
301        10 => input.chars().all(|c| ('0'..='9').contains(&c)),
302        16 => input.chars().all(|c| {
303            ('0'..='9').contains(&c) || ('a'..='f').contains(&c) || ('A'..='F').contains(&c)
304        }),
305        _ => false,
306    };
307    if !valid_number {
308        return Err(());
309    }
310
311    match u32::from_str_radix(input, r) {
312        Ok(num) => Ok(Some(num)),
313        Err(_) => Ok(None), // The only possible error kind here is an integer overflow.
314                            // The validity of the chars in the input is checked above.
315    }
316}
317
318/// <https://url.spec.whatwg.org/#concept-ipv4-parser>
319fn parse_ipv4addr(input: &str) -> ParseResult<Ipv4Addr> {
320    let mut parts: Vec<&str> = input.split('.').collect();
321    if parts.last() == Some(&"") {
322        parts.pop();
323    }
324    if parts.len() > 4 {
325        return Err(ParseError::InvalidIpv4Address);
326    }
327    let mut numbers: Vec<u32> = Vec::new();
328    for part in parts {
329        match parse_ipv4number(part) {
330            Ok(Some(n)) => numbers.push(n),
331            Ok(None) => return Err(ParseError::InvalidIpv4Address), // u32 overflow
332            Err(()) => return Err(ParseError::InvalidIpv4Address),
333        };
334    }
335    let mut ipv4 = numbers.pop().expect("a non-empty list of numbers");
336    // Equivalent to: ipv4 >= 256 ** (4 − numbers.len())
337    if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) {
338        return Err(ParseError::InvalidIpv4Address);
339    }
340    if numbers.iter().any(|x| *x > 255) {
341        return Err(ParseError::InvalidIpv4Address);
342    }
343    for (counter, n) in numbers.iter().enumerate() {
344        ipv4 += n << (8 * (3 - counter as u32))
345    }
346    Ok(Ipv4Addr::from(ipv4))
347}
348
349/// <https://url.spec.whatwg.org/#concept-ipv6-parser>
350fn parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr> {
351    let input = input.as_bytes();
352    let len = input.len();
353    let mut is_ip_v4 = false;
354    let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0];
355    let mut piece_pointer = 0;
356    let mut compress_pointer = None;
357    let mut i = 0;
358
359    if len < 2 {
360        return Err(ParseError::InvalidIpv6Address);
361    }
362
363    if input[0] == b':' {
364        if input[1] != b':' {
365            return Err(ParseError::InvalidIpv6Address);
366        }
367        i = 2;
368        piece_pointer = 1;
369        compress_pointer = Some(1);
370    }
371
372    while i < len {
373        if piece_pointer == 8 {
374            return Err(ParseError::InvalidIpv6Address);
375        }
376        if input[i] == b':' {
377            if compress_pointer.is_some() {
378                return Err(ParseError::InvalidIpv6Address);
379            }
380            i += 1;
381            piece_pointer += 1;
382            compress_pointer = Some(piece_pointer);
383            continue;
384        }
385        let start = i;
386        let end = cmp::min(len, start + 4);
387        let mut value = 0u16;
388        while i < end {
389            match (input[i] as char).to_digit(16) {
390                Some(digit) => {
391                    value = value * 0x10 + digit as u16;
392                    i += 1;
393                }
394                None => break,
395            }
396        }
397        if i < len {
398            match input[i] {
399                b'.' => {
400                    if i == start {
401                        return Err(ParseError::InvalidIpv6Address);
402                    }
403                    i = start;
404                    if piece_pointer > 6 {
405                        return Err(ParseError::InvalidIpv6Address);
406                    }
407                    is_ip_v4 = true;
408                }
409                b':' => {
410                    i += 1;
411                    if i == len {
412                        return Err(ParseError::InvalidIpv6Address);
413                    }
414                }
415                _ => return Err(ParseError::InvalidIpv6Address),
416            }
417        }
418        if is_ip_v4 {
419            break;
420        }
421        pieces[piece_pointer] = value;
422        piece_pointer += 1;
423    }
424
425    if is_ip_v4 {
426        if piece_pointer > 6 {
427            return Err(ParseError::InvalidIpv6Address);
428        }
429        let mut numbers_seen = 0;
430        while i < len {
431            if numbers_seen > 0 {
432                if numbers_seen < 4 && (i < len && input[i] == b'.') {
433                    i += 1
434                } else {
435                    return Err(ParseError::InvalidIpv6Address);
436                }
437            }
438
439            let mut ipv4_piece = None;
440            while i < len {
441                let digit = match input[i] {
442                    c @ b'0'..=b'9' => c - b'0',
443                    _ => break,
444                };
445                match ipv4_piece {
446                    None => ipv4_piece = Some(digit as u16),
447                    Some(0) => return Err(ParseError::InvalidIpv6Address), // No leading zero
448                    Some(ref mut v) => {
449                        *v = *v * 10 + digit as u16;
450                        if *v > 255 {
451                            return Err(ParseError::InvalidIpv6Address);
452                        }
453                    }
454                }
455                i += 1;
456            }
457
458            pieces[piece_pointer] = if let Some(v) = ipv4_piece {
459                pieces[piece_pointer] * 0x100 + v
460            } else {
461                return Err(ParseError::InvalidIpv6Address);
462            };
463            numbers_seen += 1;
464
465            if numbers_seen == 2 || numbers_seen == 4 {
466                piece_pointer += 1;
467            }
468        }
469
470        if numbers_seen != 4 {
471            return Err(ParseError::InvalidIpv6Address);
472        }
473    }
474
475    if i < len {
476        return Err(ParseError::InvalidIpv6Address);
477    }
478
479    match compress_pointer {
480        Some(compress_pointer) => {
481            let mut swaps = piece_pointer - compress_pointer;
482            piece_pointer = 7;
483            while swaps > 0 {
484                pieces.swap(piece_pointer, compress_pointer + swaps - 1);
485                swaps -= 1;
486                piece_pointer -= 1;
487            }
488        }
489        _ => {
490            if piece_pointer != 8 {
491                return Err(ParseError::InvalidIpv6Address);
492            }
493        }
494    }
495    Ok(Ipv6Addr::new(
496        pieces[0], pieces[1], pieces[2], pieces[3], pieces[4], pieces[5], pieces[6], pieces[7],
497    ))
498}