url/
parser.rs

1// Copyright 2013-2016 The rust-url developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9use std::error::Error;
10use std::fmt::{self, Formatter, Write};
11use std::str;
12
13use crate::host::{Host, HostInternal};
14use crate::Url;
15use form_urlencoded::EncodingOverride;
16use percent_encoding::{percent_encode, utf8_percent_encode, AsciiSet, CONTROLS};
17
18/// https://url.spec.whatwg.org/#fragment-percent-encode-set
19const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
20
21/// https://url.spec.whatwg.org/#path-percent-encode-set
22const PATH: &AsciiSet = &FRAGMENT.add(b'#').add(b'?').add(b'{').add(b'}');
23
24/// https://url.spec.whatwg.org/#userinfo-percent-encode-set
25pub(crate) const USERINFO: &AsciiSet = &PATH
26    .add(b'/')
27    .add(b':')
28    .add(b';')
29    .add(b'=')
30    .add(b'@')
31    .add(b'[')
32    .add(b'\\')
33    .add(b']')
34    .add(b'^')
35    .add(b'|');
36
37pub(crate) const PATH_SEGMENT: &AsciiSet = &PATH.add(b'/').add(b'%');
38
39// The backslash (\) character is treated as a path separator in special URLs
40// so it needs to be additionally escaped in that case.
41pub(crate) const SPECIAL_PATH_SEGMENT: &AsciiSet = &PATH_SEGMENT.add(b'\\');
42
43// https://url.spec.whatwg.org/#query-state
44const QUERY: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'#').add(b'<').add(b'>');
45const SPECIAL_QUERY: &AsciiSet = &QUERY.add(b'\'');
46
47pub type ParseResult<T> = Result<T, ParseError>;
48
49macro_rules! simple_enum_error {
50    ($($name: ident => $description: expr,)+) => {
51        /// Errors that can occur during parsing.
52        ///
53        /// This may be extended in the future so exhaustive matching is
54        /// discouraged with an unused variant.
55        #[derive(PartialEq, Eq, Clone, Copy, Debug)]
56        #[non_exhaustive]
57        pub enum ParseError {
58            $(
59                $name,
60            )+
61        }
62
63        impl fmt::Display for ParseError {
64            fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result {
65                match *self {
66                    $(
67                        ParseError::$name => fmt.write_str($description),
68                    )+
69                }
70            }
71        }
72    }
73}
74
75impl Error for ParseError {}
76
77simple_enum_error! {
78    EmptyHost => "empty host",
79    IdnaError => "invalid international domain name",
80    InvalidPort => "invalid port number",
81    InvalidIpv4Address => "invalid IPv4 address",
82    InvalidIpv6Address => "invalid IPv6 address",
83    InvalidDomainCharacter => "invalid domain character",
84    RelativeUrlWithoutBase => "relative URL without a base",
85    RelativeUrlWithCannotBeABaseBase => "relative URL with a cannot-be-a-base base",
86    SetHostOnCannotBeABaseUrl => "a cannot-be-a-base URL doesn’t have a host to set",
87    Overflow => "URLs more than 4 GB are not supported",
88}
89
90impl From<::idna::Errors> for ParseError {
91    fn from(_: ::idna::Errors) -> ParseError {
92        ParseError::IdnaError
93    }
94}
95
96macro_rules! syntax_violation_enum {
97    ($($name: ident => $description: expr,)+) => {
98        /// Non-fatal syntax violations that can occur during parsing.
99        ///
100        /// This may be extended in the future so exhaustive matching is
101        /// discouraged with an unused variant.
102        #[derive(PartialEq, Eq, Clone, Copy, Debug)]
103        #[non_exhaustive]
104        pub enum SyntaxViolation {
105            $(
106                $name,
107            )+
108        }
109
110        impl SyntaxViolation {
111            pub fn description(&self) -> &'static str {
112                match *self {
113                    $(
114                        SyntaxViolation::$name => $description,
115                    )+
116                }
117            }
118        }
119    }
120}
121
122syntax_violation_enum! {
123    Backslash => "backslash",
124    C0SpaceIgnored =>
125        "leading or trailing control or space character are ignored in URLs",
126    EmbeddedCredentials =>
127        "embedding authentication information (username or password) \
128         in an URL is not recommended",
129    ExpectedDoubleSlash => "expected //",
130    ExpectedFileDoubleSlash => "expected // after file:",
131    FileWithHostAndWindowsDrive => "file: with host and Windows drive letter",
132    NonUrlCodePoint => "non-URL code point",
133    NullInFragment => "NULL characters are ignored in URL fragment identifiers",
134    PercentDecode => "expected 2 hex digits after %",
135    TabOrNewlineIgnored => "tabs or newlines are ignored in URLs",
136    UnencodedAtSign => "unencoded @ sign in username or password",
137}
138
139impl fmt::Display for SyntaxViolation {
140    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
141        fmt::Display::fmt(self.description(), f)
142    }
143}
144
145#[derive(Copy, Clone, PartialEq, Eq)]
146pub enum SchemeType {
147    File,
148    SpecialNotFile,
149    NotSpecial,
150}
151
152impl SchemeType {
153    pub fn is_special(&self) -> bool {
154        !matches!(*self, SchemeType::NotSpecial)
155    }
156
157    pub fn is_file(&self) -> bool {
158        matches!(*self, SchemeType::File)
159    }
160
161    pub fn from(s: &str) -> Self {
162        match s {
163            "http" | "https" | "ws" | "wss" | "ftp" => SchemeType::SpecialNotFile,
164            "file" => SchemeType::File,
165            _ => SchemeType::NotSpecial,
166        }
167    }
168}
169
170pub fn default_port(scheme: &str) -> Option<u16> {
171    match scheme {
172        "http" | "ws" => Some(80),
173        "https" | "wss" => Some(443),
174        "ftp" => Some(21),
175        _ => None,
176    }
177}
178
179#[derive(Clone)]
180pub struct Input<'i> {
181    chars: str::Chars<'i>,
182}
183
184impl<'i> Input<'i> {
185    pub fn new(input: &'i str) -> Self {
186        Input::with_log(input, None)
187    }
188
189    pub fn no_trim(input: &'i str) -> Self {
190        Input {
191            chars: input.chars(),
192        }
193    }
194
195    pub fn trim_tab_and_newlines(
196        original_input: &'i str,
197        vfn: Option<&dyn Fn(SyntaxViolation)>,
198    ) -> Self {
199        let input = original_input.trim_matches(ascii_tab_or_new_line);
200        if let Some(vfn) = vfn {
201            if input.len() < original_input.len() {
202                vfn(SyntaxViolation::C0SpaceIgnored)
203            }
204            if input.chars().any(|c| matches!(c, '\t' | '\n' | '\r')) {
205                vfn(SyntaxViolation::TabOrNewlineIgnored)
206            }
207        }
208        Input {
209            chars: input.chars(),
210        }
211    }
212
213    pub fn with_log(original_input: &'i str, vfn: Option<&dyn Fn(SyntaxViolation)>) -> Self {
214        let input = original_input.trim_matches(c0_control_or_space);
215        if let Some(vfn) = vfn {
216            if input.len() < original_input.len() {
217                vfn(SyntaxViolation::C0SpaceIgnored)
218            }
219            if input.chars().any(|c| matches!(c, '\t' | '\n' | '\r')) {
220                vfn(SyntaxViolation::TabOrNewlineIgnored)
221            }
222        }
223        Input {
224            chars: input.chars(),
225        }
226    }
227
228    #[inline]
229    pub fn is_empty(&self) -> bool {
230        self.clone().next().is_none()
231    }
232
233    #[inline]
234    fn starts_with<P: Pattern>(&self, p: P) -> bool {
235        p.split_prefix(&mut self.clone())
236    }
237
238    #[inline]
239    pub fn split_prefix<P: Pattern>(&self, p: P) -> Option<Self> {
240        let mut remaining = self.clone();
241        if p.split_prefix(&mut remaining) {
242            Some(remaining)
243        } else {
244            None
245        }
246    }
247
248    #[inline]
249    fn split_first(&self) -> (Option<char>, Self) {
250        let mut remaining = self.clone();
251        (remaining.next(), remaining)
252    }
253
254    #[inline]
255    fn count_matching<F: Fn(char) -> bool>(&self, f: F) -> (u32, Self) {
256        let mut count = 0;
257        let mut remaining = self.clone();
258        loop {
259            let mut input = remaining.clone();
260            if matches!(input.next(), Some(c) if f(c)) {
261                remaining = input;
262                count += 1;
263            } else {
264                return (count, remaining);
265            }
266        }
267    }
268
269    #[inline]
270    fn next_utf8(&mut self) -> Option<(char, &'i str)> {
271        loop {
272            let utf8 = self.chars.as_str();
273            match self.chars.next() {
274                Some(c) => {
275                    if !matches!(c, '\t' | '\n' | '\r') {
276                        return Some((c, &utf8[..c.len_utf8()]));
277                    }
278                }
279                None => return None,
280            }
281        }
282    }
283}
284
285pub trait Pattern {
286    fn split_prefix(self, input: &mut Input) -> bool;
287}
288
289impl Pattern for char {
290    fn split_prefix(self, input: &mut Input) -> bool {
291        input.next() == Some(self)
292    }
293}
294
295impl<'a> Pattern for &'a str {
296    fn split_prefix(self, input: &mut Input) -> bool {
297        for c in self.chars() {
298            if input.next() != Some(c) {
299                return false;
300            }
301        }
302        true
303    }
304}
305
306impl<F: FnMut(char) -> bool> Pattern for F {
307    fn split_prefix(self, input: &mut Input) -> bool {
308        input.next().map_or(false, self)
309    }
310}
311
312impl<'i> Iterator for Input<'i> {
313    type Item = char;
314    fn next(&mut self) -> Option<char> {
315        self.chars
316            .by_ref()
317            .find(|&c| !matches!(c, '\t' | '\n' | '\r'))
318    }
319}
320
321pub struct Parser<'a> {
322    pub serialization: String,
323    pub base_url: Option<&'a Url>,
324    pub query_encoding_override: EncodingOverride<'a>,
325    pub violation_fn: Option<&'a dyn Fn(SyntaxViolation)>,
326    pub context: Context,
327}
328
329#[derive(PartialEq, Eq, Copy, Clone)]
330pub enum Context {
331    UrlParser,
332    Setter,
333    PathSegmentSetter,
334}
335
336impl<'a> Parser<'a> {
337    fn log_violation(&self, v: SyntaxViolation) {
338        if let Some(f) = self.violation_fn {
339            f(v)
340        }
341    }
342
343    fn log_violation_if(&self, v: SyntaxViolation, test: impl FnOnce() -> bool) {
344        if let Some(f) = self.violation_fn {
345            if test() {
346                f(v)
347            }
348        }
349    }
350
351    pub fn for_setter(serialization: String) -> Parser<'a> {
352        Parser {
353            serialization,
354            base_url: None,
355            query_encoding_override: None,
356            violation_fn: None,
357            context: Context::Setter,
358        }
359    }
360
361    /// https://url.spec.whatwg.org/#concept-basic-url-parser
362    pub fn parse_url(mut self, input: &str) -> ParseResult<Url> {
363        let input = Input::with_log(input, self.violation_fn);
364        if let Ok(remaining) = self.parse_scheme(input.clone()) {
365            return self.parse_with_scheme(remaining);
366        }
367
368        // No-scheme state
369        if let Some(base_url) = self.base_url {
370            if input.starts_with('#') {
371                self.fragment_only(base_url, input)
372            } else if base_url.cannot_be_a_base() {
373                Err(ParseError::RelativeUrlWithCannotBeABaseBase)
374            } else {
375                let scheme_type = SchemeType::from(base_url.scheme());
376                if scheme_type.is_file() {
377                    self.parse_file(input, scheme_type, Some(base_url))
378                } else {
379                    self.parse_relative(input, scheme_type, base_url)
380                }
381            }
382        } else {
383            Err(ParseError::RelativeUrlWithoutBase)
384        }
385    }
386
387    pub fn parse_scheme<'i>(&mut self, mut input: Input<'i>) -> Result<Input<'i>, ()> {
388        if input.is_empty() || !input.starts_with(ascii_alpha) {
389            return Err(());
390        }
391        debug_assert!(self.serialization.is_empty());
392        while let Some(c) = input.next() {
393            match c {
394                'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.' => {
395                    self.serialization.push(c.to_ascii_lowercase())
396                }
397                ':' => return Ok(input),
398                _ => {
399                    self.serialization.clear();
400                    return Err(());
401                }
402            }
403        }
404        // EOF before ':'
405        if self.context == Context::Setter {
406            Ok(input)
407        } else {
408            self.serialization.clear();
409            Err(())
410        }
411    }
412
413    fn parse_with_scheme(mut self, input: Input<'_>) -> ParseResult<Url> {
414        use crate::SyntaxViolation::{ExpectedDoubleSlash, ExpectedFileDoubleSlash};
415        let scheme_end = to_u32(self.serialization.len())?;
416        let scheme_type = SchemeType::from(&self.serialization);
417        self.serialization.push(':');
418        match scheme_type {
419            SchemeType::File => {
420                self.log_violation_if(ExpectedFileDoubleSlash, || !input.starts_with("//"));
421                let base_file_url = self.base_url.and_then(|base| {
422                    if base.scheme() == "file" {
423                        Some(base)
424                    } else {
425                        None
426                    }
427                });
428                self.serialization.clear();
429                self.parse_file(input, scheme_type, base_file_url)
430            }
431            SchemeType::SpecialNotFile => {
432                // special relative or authority state
433                let (slashes_count, remaining) = input.count_matching(|c| matches!(c, '/' | '\\'));
434                if let Some(base_url) = self.base_url {
435                    if slashes_count < 2
436                        && base_url.scheme() == &self.serialization[..scheme_end as usize]
437                    {
438                        // "Cannot-be-a-base" URLs only happen with "not special" schemes.
439                        debug_assert!(!base_url.cannot_be_a_base());
440                        self.serialization.clear();
441                        return self.parse_relative(input, scheme_type, base_url);
442                    }
443                }
444                // special authority slashes state
445                self.log_violation_if(ExpectedDoubleSlash, || {
446                    input
447                        .clone()
448                        .take_while(|&c| matches!(c, '/' | '\\'))
449                        .collect::<String>()
450                        != "//"
451                });
452                self.after_double_slash(remaining, scheme_type, scheme_end)
453            }
454            SchemeType::NotSpecial => self.parse_non_special(input, scheme_type, scheme_end),
455        }
456    }
457
458    /// Scheme other than file, http, https, ws, ws, ftp.
459    fn parse_non_special(
460        mut self,
461        input: Input<'_>,
462        scheme_type: SchemeType,
463        scheme_end: u32,
464    ) -> ParseResult<Url> {
465        // path or authority state (
466        if let Some(input) = input.split_prefix("//") {
467            return self.after_double_slash(input, scheme_type, scheme_end);
468        }
469        // Anarchist URL (no authority)
470        let path_start = to_u32(self.serialization.len())?;
471        let username_end = path_start;
472        let host_start = path_start;
473        let host_end = path_start;
474        let host = HostInternal::None;
475        let port = None;
476        let remaining = if let Some(input) = input.split_prefix('/') {
477            let path_start = self.serialization.len();
478            self.serialization.push('/');
479            self.parse_path(scheme_type, &mut false, path_start, input)
480        } else {
481            self.parse_cannot_be_a_base_path(input)
482        };
483        self.with_query_and_fragment(
484            scheme_type,
485            scheme_end,
486            username_end,
487            host_start,
488            host_end,
489            host,
490            port,
491            path_start,
492            remaining,
493        )
494    }
495
496    fn parse_file(
497        mut self,
498        input: Input<'_>,
499        scheme_type: SchemeType,
500        base_file_url: Option<&Url>,
501    ) -> ParseResult<Url> {
502        use crate::SyntaxViolation::Backslash;
503        // file state
504        debug_assert!(self.serialization.is_empty());
505        let (first_char, input_after_first_char) = input.split_first();
506        if matches!(first_char, Some('/') | Some('\\')) {
507            self.log_violation_if(SyntaxViolation::Backslash, || first_char == Some('\\'));
508            // file slash state
509            let (next_char, input_after_next_char) = input_after_first_char.split_first();
510            if matches!(next_char, Some('/') | Some('\\')) {
511                self.log_violation_if(Backslash, || next_char == Some('\\'));
512                // file host state
513                self.serialization.push_str("file://");
514                let scheme_end = "file".len() as u32;
515                let host_start = "file://".len() as u32;
516                let (path_start, mut host, remaining) =
517                    self.parse_file_host(input_after_next_char)?;
518                let mut host_end = to_u32(self.serialization.len())?;
519                let mut has_host = !matches!(host, HostInternal::None);
520                let remaining = if path_start {
521                    self.parse_path_start(SchemeType::File, &mut has_host, remaining)
522                } else {
523                    let path_start = self.serialization.len();
524                    self.serialization.push('/');
525                    self.parse_path(SchemeType::File, &mut has_host, path_start, remaining)
526                };
527
528                // For file URLs that have a host and whose path starts
529                // with the windows drive letter we just remove the host.
530                if !has_host {
531                    self.serialization
532                        .drain(host_start as usize..host_end as usize);
533                    host_end = host_start;
534                    host = HostInternal::None;
535                }
536                let (query_start, fragment_start) =
537                    self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
538                return Ok(Url {
539                    serialization: self.serialization,
540                    scheme_end,
541                    username_end: host_start,
542                    host_start,
543                    host_end,
544                    host,
545                    port: None,
546                    path_start: host_end,
547                    query_start,
548                    fragment_start,
549                });
550            } else {
551                self.serialization.push_str("file://");
552                let scheme_end = "file".len() as u32;
553                let host_start = "file://".len();
554                let mut host_end = host_start;
555                let mut host = HostInternal::None;
556                if !starts_with_windows_drive_letter_segment(&input_after_first_char) {
557                    if let Some(base_url) = base_file_url {
558                        let first_segment = base_url.path_segments().unwrap().next().unwrap();
559                        if is_normalized_windows_drive_letter(first_segment) {
560                            self.serialization.push('/');
561                            self.serialization.push_str(first_segment);
562                        } else if let Some(host_str) = base_url.host_str() {
563                            self.serialization.push_str(host_str);
564                            host_end = self.serialization.len();
565                            host = base_url.host;
566                        }
567                    }
568                }
569                // If c is the EOF code point, U+002F (/), U+005C (\), U+003F (?), or U+0023 (#), then decrease pointer by one
570                let parse_path_input = if let Some(c) = first_char {
571                    if c == '/' || c == '\\' || c == '?' || c == '#' {
572                        input
573                    } else {
574                        input_after_first_char
575                    }
576                } else {
577                    input_after_first_char
578                };
579
580                let remaining =
581                    self.parse_path(SchemeType::File, &mut false, host_end, parse_path_input);
582
583                let host_start = host_start as u32;
584
585                let (query_start, fragment_start) =
586                    self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
587
588                let host_end = host_end as u32;
589                return Ok(Url {
590                    serialization: self.serialization,
591                    scheme_end,
592                    username_end: host_start,
593                    host_start,
594                    host_end,
595                    host,
596                    port: None,
597                    path_start: host_end,
598                    query_start,
599                    fragment_start,
600                });
601            }
602        }
603        if let Some(base_url) = base_file_url {
604            match first_char {
605                None => {
606                    // Copy everything except the fragment
607                    let before_fragment = match base_url.fragment_start {
608                        Some(i) => &base_url.serialization[..i as usize],
609                        None => &*base_url.serialization,
610                    };
611                    self.serialization.push_str(before_fragment);
612                    Ok(Url {
613                        serialization: self.serialization,
614                        fragment_start: None,
615                        ..*base_url
616                    })
617                }
618                Some('?') => {
619                    // Copy everything up to the query string
620                    let before_query = match (base_url.query_start, base_url.fragment_start) {
621                        (None, None) => &*base_url.serialization,
622                        (Some(i), _) | (None, Some(i)) => base_url.slice(..i),
623                    };
624                    self.serialization.push_str(before_query);
625                    let (query_start, fragment_start) =
626                        self.parse_query_and_fragment(scheme_type, base_url.scheme_end, input)?;
627                    Ok(Url {
628                        serialization: self.serialization,
629                        query_start,
630                        fragment_start,
631                        ..*base_url
632                    })
633                }
634                Some('#') => self.fragment_only(base_url, input),
635                _ => {
636                    if !starts_with_windows_drive_letter_segment(&input) {
637                        let before_query = match (base_url.query_start, base_url.fragment_start) {
638                            (None, None) => &*base_url.serialization,
639                            (Some(i), _) | (None, Some(i)) => base_url.slice(..i),
640                        };
641                        self.serialization.push_str(before_query);
642                        self.shorten_path(SchemeType::File, base_url.path_start as usize);
643                        let remaining = self.parse_path(
644                            SchemeType::File,
645                            &mut true,
646                            base_url.path_start as usize,
647                            input,
648                        );
649                        self.with_query_and_fragment(
650                            SchemeType::File,
651                            base_url.scheme_end,
652                            base_url.username_end,
653                            base_url.host_start,
654                            base_url.host_end,
655                            base_url.host,
656                            base_url.port,
657                            base_url.path_start,
658                            remaining,
659                        )
660                    } else {
661                        self.serialization.push_str("file:///");
662                        let scheme_end = "file".len() as u32;
663                        let path_start = "file://".len();
664                        let remaining =
665                            self.parse_path(SchemeType::File, &mut false, path_start, input);
666                        let (query_start, fragment_start) =
667                            self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?;
668                        let path_start = path_start as u32;
669                        Ok(Url {
670                            serialization: self.serialization,
671                            scheme_end,
672                            username_end: path_start,
673                            host_start: path_start,
674                            host_end: path_start,
675                            host: HostInternal::None,
676                            port: None,
677                            path_start,
678                            query_start,
679                            fragment_start,
680                        })
681                    }
682                }
683            }
684        } else {
685            self.serialization.push_str("file:///");
686            let scheme_end = "file".len() as u32;
687            let path_start = "file://".len();
688            let remaining = self.parse_path(SchemeType::File, &mut false, path_start, input);
689            let (query_start, fragment_start) =
690                self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?;
691            let path_start = path_start as u32;
692            Ok(Url {
693                serialization: self.serialization,
694                scheme_end,
695                username_end: path_start,
696                host_start: path_start,
697                host_end: path_start,
698                host: HostInternal::None,
699                port: None,
700                path_start,
701                query_start,
702                fragment_start,
703            })
704        }
705    }
706
707    fn parse_relative(
708        mut self,
709        input: Input<'_>,
710        scheme_type: SchemeType,
711        base_url: &Url,
712    ) -> ParseResult<Url> {
713        // relative state
714        debug_assert!(self.serialization.is_empty());
715        let (first_char, input_after_first_char) = input.split_first();
716        match first_char {
717            None => {
718                // Copy everything except the fragment
719                let before_fragment = match base_url.fragment_start {
720                    Some(i) => &base_url.serialization[..i as usize],
721                    None => &*base_url.serialization,
722                };
723                self.serialization.push_str(before_fragment);
724                Ok(Url {
725                    serialization: self.serialization,
726                    fragment_start: None,
727                    ..*base_url
728                })
729            }
730            Some('?') => {
731                // Copy everything up to the query string
732                let before_query = match (base_url.query_start, base_url.fragment_start) {
733                    (None, None) => &*base_url.serialization,
734                    (Some(i), _) | (None, Some(i)) => base_url.slice(..i),
735                };
736                self.serialization.push_str(before_query);
737                let (query_start, fragment_start) =
738                    self.parse_query_and_fragment(scheme_type, base_url.scheme_end, input)?;
739                Ok(Url {
740                    serialization: self.serialization,
741                    query_start,
742                    fragment_start,
743                    ..*base_url
744                })
745            }
746            Some('#') => self.fragment_only(base_url, input),
747            Some('/') | Some('\\') => {
748                let (slashes_count, remaining) = input.count_matching(|c| matches!(c, '/' | '\\'));
749                if slashes_count >= 2 {
750                    self.log_violation_if(SyntaxViolation::ExpectedDoubleSlash, || {
751                        input
752                            .clone()
753                            .take_while(|&c| matches!(c, '/' | '\\'))
754                            .collect::<String>()
755                            != "//"
756                    });
757                    let scheme_end = base_url.scheme_end;
758                    debug_assert!(base_url.byte_at(scheme_end) == b':');
759                    self.serialization
760                        .push_str(base_url.slice(..scheme_end + 1));
761                    if let Some(after_prefix) = input.split_prefix("//") {
762                        return self.after_double_slash(after_prefix, scheme_type, scheme_end);
763                    }
764                    return self.after_double_slash(remaining, scheme_type, scheme_end);
765                }
766                let path_start = base_url.path_start;
767                self.serialization.push_str(base_url.slice(..path_start));
768                self.serialization.push('/');
769                let remaining = self.parse_path(
770                    scheme_type,
771                    &mut true,
772                    path_start as usize,
773                    input_after_first_char,
774                );
775                self.with_query_and_fragment(
776                    scheme_type,
777                    base_url.scheme_end,
778                    base_url.username_end,
779                    base_url.host_start,
780                    base_url.host_end,
781                    base_url.host,
782                    base_url.port,
783                    base_url.path_start,
784                    remaining,
785                )
786            }
787            _ => {
788                let before_query = match (base_url.query_start, base_url.fragment_start) {
789                    (None, None) => &*base_url.serialization,
790                    (Some(i), _) | (None, Some(i)) => base_url.slice(..i),
791                };
792                self.serialization.push_str(before_query);
793                // FIXME spec says just "remove last entry", not the "pop" algorithm
794                self.pop_path(scheme_type, base_url.path_start as usize);
795                // A special url always has a path.
796                // A path always starts with '/'
797                if self.serialization.len() == base_url.path_start as usize
798                    && (SchemeType::from(base_url.scheme()).is_special() || !input.is_empty())
799                {
800                    self.serialization.push('/');
801                }
802                let remaining = match input.split_first() {
803                    (Some('/'), remaining) => self.parse_path(
804                        scheme_type,
805                        &mut true,
806                        base_url.path_start as usize,
807                        remaining,
808                    ),
809                    _ => {
810                        self.parse_path(scheme_type, &mut true, base_url.path_start as usize, input)
811                    }
812                };
813                self.with_query_and_fragment(
814                    scheme_type,
815                    base_url.scheme_end,
816                    base_url.username_end,
817                    base_url.host_start,
818                    base_url.host_end,
819                    base_url.host,
820                    base_url.port,
821                    base_url.path_start,
822                    remaining,
823                )
824            }
825        }
826    }
827
828    fn after_double_slash(
829        mut self,
830        input: Input<'_>,
831        scheme_type: SchemeType,
832        scheme_end: u32,
833    ) -> ParseResult<Url> {
834        self.serialization.push('/');
835        self.serialization.push('/');
836        // authority state
837        let before_authority = self.serialization.len();
838        let (username_end, remaining) = self.parse_userinfo(input, scheme_type)?;
839        let has_authority = before_authority != self.serialization.len();
840        // host state
841        let host_start = to_u32(self.serialization.len())?;
842        let (host_end, host, port, remaining) =
843            self.parse_host_and_port(remaining, scheme_end, scheme_type)?;
844        if host == HostInternal::None && has_authority {
845            return Err(ParseError::EmptyHost);
846        }
847        // path state
848        let path_start = to_u32(self.serialization.len())?;
849        let remaining = self.parse_path_start(scheme_type, &mut true, remaining);
850        self.with_query_and_fragment(
851            scheme_type,
852            scheme_end,
853            username_end,
854            host_start,
855            host_end,
856            host,
857            port,
858            path_start,
859            remaining,
860        )
861    }
862
863    /// Return (username_end, remaining)
864    fn parse_userinfo<'i>(
865        &mut self,
866        mut input: Input<'i>,
867        scheme_type: SchemeType,
868    ) -> ParseResult<(u32, Input<'i>)> {
869        let mut last_at = None;
870        let mut remaining = input.clone();
871        let mut char_count = 0;
872        while let Some(c) = remaining.next() {
873            match c {
874                '@' => {
875                    if last_at.is_some() {
876                        self.log_violation(SyntaxViolation::UnencodedAtSign)
877                    } else {
878                        self.log_violation(SyntaxViolation::EmbeddedCredentials)
879                    }
880                    last_at = Some((char_count, remaining.clone()))
881                }
882                '/' | '?' | '#' => break,
883                '\\' if scheme_type.is_special() => break,
884                _ => (),
885            }
886            char_count += 1;
887        }
888        let (mut userinfo_char_count, remaining) = match last_at {
889            None => return Ok((to_u32(self.serialization.len())?, input)),
890            Some((0, remaining)) => {
891                // Otherwise, if one of the following is true
892                // c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#)
893                // url is special and c is U+005C (\)
894                // If @ flag is set and buffer is the empty string, validation error, return failure.
895                if let (Some(c), _) = remaining.split_first() {
896                    if c == '/' || c == '?' || c == '#' || (scheme_type.is_special() && c == '\\') {
897                        return Err(ParseError::EmptyHost);
898                    }
899                }
900                return Ok((to_u32(self.serialization.len())?, remaining));
901            }
902            Some(x) => x,
903        };
904
905        let mut username_end = None;
906        let mut has_password = false;
907        let mut has_username = false;
908        while userinfo_char_count > 0 {
909            let (c, utf8_c) = input.next_utf8().unwrap();
910            userinfo_char_count -= 1;
911            if c == ':' && username_end.is_none() {
912                // Start parsing password
913                username_end = Some(to_u32(self.serialization.len())?);
914                // We don't add a colon if the password is empty
915                if userinfo_char_count > 0 {
916                    self.serialization.push(':');
917                    has_password = true;
918                }
919            } else {
920                if !has_password {
921                    has_username = true;
922                }
923                self.check_url_code_point(c, &input);
924                self.serialization
925                    .extend(utf8_percent_encode(utf8_c, USERINFO));
926            }
927        }
928        let username_end = match username_end {
929            Some(i) => i,
930            None => to_u32(self.serialization.len())?,
931        };
932        if has_username || has_password {
933            self.serialization.push('@');
934        }
935        Ok((username_end, remaining))
936    }
937
938    fn parse_host_and_port<'i>(
939        &mut self,
940        input: Input<'i>,
941        scheme_end: u32,
942        scheme_type: SchemeType,
943    ) -> ParseResult<(u32, HostInternal, Option<u16>, Input<'i>)> {
944        let (host, remaining) = Parser::parse_host(input, scheme_type)?;
945        write!(&mut self.serialization, "{}", host).unwrap();
946        let host_end = to_u32(self.serialization.len())?;
947        if let Host::Domain(h) = &host {
948            if h.is_empty() {
949                // Port with an empty host
950                if remaining.starts_with(":") {
951                    return Err(ParseError::EmptyHost);
952                }
953                if scheme_type.is_special() {
954                    return Err(ParseError::EmptyHost);
955                }
956            }
957        };
958
959        let (port, remaining) = if let Some(remaining) = remaining.split_prefix(':') {
960            let scheme = || default_port(&self.serialization[..scheme_end as usize]);
961            Parser::parse_port(remaining, scheme, self.context)?
962        } else {
963            (None, remaining)
964        };
965        if let Some(port) = port {
966            write!(&mut self.serialization, ":{}", port).unwrap()
967        }
968        Ok((host_end, host.into(), port, remaining))
969    }
970
971    pub fn parse_host(
972        mut input: Input<'_>,
973        scheme_type: SchemeType,
974    ) -> ParseResult<(Host<String>, Input<'_>)> {
975        if scheme_type.is_file() {
976            return Parser::get_file_host(input);
977        }
978        // Undo the Input abstraction here to avoid allocating in the common case
979        // where the host part of the input does not contain any tab or newline
980        let input_str = input.chars.as_str();
981        let mut inside_square_brackets = false;
982        let mut has_ignored_chars = false;
983        let mut non_ignored_chars = 0;
984        let mut bytes = 0;
985        for c in input_str.chars() {
986            match c {
987                ':' if !inside_square_brackets => break,
988                '\\' if scheme_type.is_special() => break,
989                '/' | '?' | '#' => break,
990                '\t' | '\n' | '\r' => {
991                    has_ignored_chars = true;
992                }
993                '[' => {
994                    inside_square_brackets = true;
995                    non_ignored_chars += 1
996                }
997                ']' => {
998                    inside_square_brackets = false;
999                    non_ignored_chars += 1
1000                }
1001                _ => non_ignored_chars += 1,
1002            }
1003            bytes += c.len_utf8();
1004        }
1005        let replaced: String;
1006        let host_str;
1007        {
1008            let host_input = input.by_ref().take(non_ignored_chars);
1009            if has_ignored_chars {
1010                replaced = host_input.collect();
1011                host_str = &*replaced
1012            } else {
1013                for _ in host_input {}
1014                host_str = &input_str[..bytes]
1015            }
1016        }
1017        if scheme_type == SchemeType::SpecialNotFile && host_str.is_empty() {
1018            return Err(ParseError::EmptyHost);
1019        }
1020        if !scheme_type.is_special() {
1021            let host = Host::parse_opaque(host_str)?;
1022            return Ok((host, input));
1023        }
1024        let host = Host::parse(host_str)?;
1025        Ok((host, input))
1026    }
1027
1028    fn get_file_host(input: Input<'_>) -> ParseResult<(Host<String>, Input<'_>)> {
1029        let (_, host_str, remaining) = Parser::file_host(input)?;
1030        let host = match Host::parse(&host_str)? {
1031            Host::Domain(ref d) if d == "localhost" => Host::Domain("".to_string()),
1032            host => host,
1033        };
1034        Ok((host, remaining))
1035    }
1036
1037    fn parse_file_host<'i>(
1038        &mut self,
1039        input: Input<'i>,
1040    ) -> ParseResult<(bool, HostInternal, Input<'i>)> {
1041        let has_host;
1042        let (_, host_str, remaining) = Parser::file_host(input)?;
1043        let host = if host_str.is_empty() {
1044            has_host = false;
1045            HostInternal::None
1046        } else {
1047            match Host::parse(&host_str)? {
1048                Host::Domain(ref d) if d == "localhost" => {
1049                    has_host = false;
1050                    HostInternal::None
1051                }
1052                host => {
1053                    write!(&mut self.serialization, "{}", host).unwrap();
1054                    has_host = true;
1055                    host.into()
1056                }
1057            }
1058        };
1059        Ok((has_host, host, remaining))
1060    }
1061
1062    pub fn file_host(input: Input) -> ParseResult<(bool, String, Input)> {
1063        // Undo the Input abstraction here to avoid allocating in the common case
1064        // where the host part of the input does not contain any tab or newline
1065        let input_str = input.chars.as_str();
1066        let mut has_ignored_chars = false;
1067        let mut non_ignored_chars = 0;
1068        let mut bytes = 0;
1069        for c in input_str.chars() {
1070            match c {
1071                '/' | '\\' | '?' | '#' => break,
1072                '\t' | '\n' | '\r' => has_ignored_chars = true,
1073                _ => non_ignored_chars += 1,
1074            }
1075            bytes += c.len_utf8();
1076        }
1077        let replaced: String;
1078        let host_str;
1079        let mut remaining = input.clone();
1080        {
1081            let host_input = remaining.by_ref().take(non_ignored_chars);
1082            if has_ignored_chars {
1083                replaced = host_input.collect();
1084                host_str = &*replaced
1085            } else {
1086                for _ in host_input {}
1087                host_str = &input_str[..bytes]
1088            }
1089        }
1090        if is_windows_drive_letter(host_str) {
1091            return Ok((false, "".to_string(), input));
1092        }
1093        Ok((true, host_str.to_string(), remaining))
1094    }
1095
1096    pub fn parse_port<P>(
1097        mut input: Input<'_>,
1098        default_port: P,
1099        context: Context,
1100    ) -> ParseResult<(Option<u16>, Input<'_>)>
1101    where
1102        P: Fn() -> Option<u16>,
1103    {
1104        let mut port: u32 = 0;
1105        let mut has_any_digit = false;
1106        while let (Some(c), remaining) = input.split_first() {
1107            if let Some(digit) = c.to_digit(10) {
1108                port = port * 10 + digit;
1109                if port > ::std::u16::MAX as u32 {
1110                    return Err(ParseError::InvalidPort);
1111                }
1112                has_any_digit = true;
1113            } else if context == Context::UrlParser && !matches!(c, '/' | '\\' | '?' | '#') {
1114                return Err(ParseError::InvalidPort);
1115            } else {
1116                break;
1117            }
1118            input = remaining;
1119        }
1120        let mut opt_port = Some(port as u16);
1121        if !has_any_digit || opt_port == default_port() {
1122            opt_port = None;
1123        }
1124        Ok((opt_port, input))
1125    }
1126
1127    pub fn parse_path_start<'i>(
1128        &mut self,
1129        scheme_type: SchemeType,
1130        has_host: &mut bool,
1131        input: Input<'i>,
1132    ) -> Input<'i> {
1133        let path_start = self.serialization.len();
1134        let (maybe_c, remaining) = input.split_first();
1135        // If url is special, then:
1136        if scheme_type.is_special() {
1137            if maybe_c == Some('\\') {
1138                // If c is U+005C (\), validation error.
1139                self.log_violation(SyntaxViolation::Backslash);
1140            }
1141            // A special URL always has a non-empty path.
1142            if !self.serialization.ends_with('/') {
1143                self.serialization.push('/');
1144                // We have already made sure the forward slash is present.
1145                if maybe_c == Some('/') || maybe_c == Some('\\') {
1146                    return self.parse_path(scheme_type, has_host, path_start, remaining);
1147                }
1148            }
1149            return self.parse_path(scheme_type, has_host, path_start, input);
1150        } else if maybe_c == Some('?') || maybe_c == Some('#') {
1151            // Otherwise, if state override is not given and c is U+003F (?),
1152            // set url’s query to the empty string and state to query state.
1153            // Otherwise, if state override is not given and c is U+0023 (#),
1154            // set url’s fragment to the empty string and state to fragment state.
1155            // The query and path states will be handled by the caller.
1156            return input;
1157        }
1158
1159        if maybe_c != None && maybe_c != Some('/') {
1160            self.serialization.push('/');
1161        }
1162        // Otherwise, if c is not the EOF code point:
1163        self.parse_path(scheme_type, has_host, path_start, input)
1164    }
1165
1166    pub fn parse_path<'i>(
1167        &mut self,
1168        scheme_type: SchemeType,
1169        has_host: &mut bool,
1170        path_start: usize,
1171        mut input: Input<'i>,
1172    ) -> Input<'i> {
1173        // Relative path state
1174        loop {
1175            let segment_start = self.serialization.len();
1176            let mut ends_with_slash = false;
1177            loop {
1178                let input_before_c = input.clone();
1179                let (c, utf8_c) = if let Some(x) = input.next_utf8() {
1180                    x
1181                } else {
1182                    break;
1183                };
1184                match c {
1185                    '/' if self.context != Context::PathSegmentSetter => {
1186                        self.serialization.push(c);
1187                        ends_with_slash = true;
1188                        break;
1189                    }
1190                    '\\' if self.context != Context::PathSegmentSetter
1191                        && scheme_type.is_special() =>
1192                    {
1193                        self.log_violation(SyntaxViolation::Backslash);
1194                        self.serialization.push('/');
1195                        ends_with_slash = true;
1196                        break;
1197                    }
1198                    '?' | '#' if self.context == Context::UrlParser => {
1199                        input = input_before_c;
1200                        break;
1201                    }
1202                    _ => {
1203                        self.check_url_code_point(c, &input);
1204                        if self.context == Context::PathSegmentSetter {
1205                            if scheme_type.is_special() {
1206                                self.serialization
1207                                    .extend(utf8_percent_encode(utf8_c, SPECIAL_PATH_SEGMENT));
1208                            } else {
1209                                self.serialization
1210                                    .extend(utf8_percent_encode(utf8_c, PATH_SEGMENT));
1211                            }
1212                        } else {
1213                            self.serialization.extend(utf8_percent_encode(utf8_c, PATH));
1214                        }
1215                    }
1216                }
1217            }
1218            let segment_before_slash = if ends_with_slash {
1219                &self.serialization[segment_start..self.serialization.len() - 1]
1220            } else {
1221                &self.serialization[segment_start..self.serialization.len()]
1222            };
1223            match segment_before_slash {
1224                // If buffer is a double-dot path segment, shorten url’s path,
1225                ".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e"
1226                | ".%2E" => {
1227                    debug_assert!(self.serialization.as_bytes()[segment_start - 1] == b'/');
1228                    self.serialization.truncate(segment_start);
1229                    if self.serialization.ends_with('/')
1230                        && Parser::last_slash_can_be_removed(&self.serialization, path_start)
1231                    {
1232                        self.serialization.pop();
1233                    }
1234                    self.shorten_path(scheme_type, path_start);
1235
1236                    // and then if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path.
1237                    if ends_with_slash && !self.serialization.ends_with('/') {
1238                        self.serialization.push('/');
1239                    }
1240                }
1241                // Otherwise, if buffer is a single-dot path segment and if neither c is U+002F (/),
1242                // nor url is special and c is U+005C (\), append the empty string to url’s path.
1243                "." | "%2e" | "%2E" => {
1244                    self.serialization.truncate(segment_start);
1245                    if !self.serialization.ends_with('/') {
1246                        self.serialization.push('/');
1247                    }
1248                }
1249                _ => {
1250                    // If url’s scheme is "file", url’s path is empty, and buffer is a Windows drive letter, then
1251                    if scheme_type.is_file() && is_windows_drive_letter(segment_before_slash) {
1252                        // Replace the second code point in buffer with U+003A (:).
1253                        if let Some(c) = segment_before_slash.chars().next() {
1254                            self.serialization.truncate(segment_start);
1255                            self.serialization.push(c);
1256                            self.serialization.push(':');
1257                            if ends_with_slash {
1258                                self.serialization.push('/');
1259                            }
1260                        }
1261                        // If url’s host is neither the empty string nor null,
1262                        // validation error, set url’s host to the empty string.
1263                        if *has_host {
1264                            self.log_violation(SyntaxViolation::FileWithHostAndWindowsDrive);
1265                            *has_host = false; // FIXME account for this in callers
1266                        }
1267                    }
1268                }
1269            }
1270            if !ends_with_slash {
1271                break;
1272            }
1273        }
1274        if scheme_type.is_file() {
1275            // while url’s path’s size is greater than 1
1276            // and url’s path[0] is the empty string,
1277            // validation error, remove the first item from url’s path.
1278            //FIXME: log violation
1279            let path = self.serialization.split_off(path_start);
1280            self.serialization.push('/');
1281            self.serialization.push_str(path.trim_start_matches('/'));
1282        }
1283
1284        input
1285    }
1286
1287    fn last_slash_can_be_removed(serialization: &str, path_start: usize) -> bool {
1288        let url_before_segment = &serialization[..serialization.len() - 1];
1289        if let Some(segment_before_start) = url_before_segment.rfind('/') {
1290            // Do not remove the root slash
1291            segment_before_start >= path_start
1292                // Or a windows drive letter slash
1293                && !path_starts_with_windows_drive_letter(&serialization[segment_before_start..])
1294        } else {
1295            false
1296        }
1297    }
1298
1299    /// https://url.spec.whatwg.org/#shorten-a-urls-path
1300    fn shorten_path(&mut self, scheme_type: SchemeType, path_start: usize) {
1301        // If path is empty, then return.
1302        if self.serialization.len() == path_start {
1303            return;
1304        }
1305        // If url’s scheme is "file", path’s size is 1, and path[0] is a normalized Windows drive letter, then return.
1306        if scheme_type.is_file()
1307            && is_normalized_windows_drive_letter(&self.serialization[path_start..])
1308        {
1309            return;
1310        }
1311        // Remove path’s last item.
1312        self.pop_path(scheme_type, path_start);
1313    }
1314
1315    /// https://url.spec.whatwg.org/#pop-a-urls-path
1316    fn pop_path(&mut self, scheme_type: SchemeType, path_start: usize) {
1317        if self.serialization.len() > path_start {
1318            let slash_position = self.serialization[path_start..].rfind('/').unwrap();
1319            // + 1 since rfind returns the position before the slash.
1320            let segment_start = path_start + slash_position + 1;
1321            // Don’t pop a Windows drive letter
1322            if !(scheme_type.is_file()
1323                && is_normalized_windows_drive_letter(&self.serialization[segment_start..]))
1324            {
1325                self.serialization.truncate(segment_start);
1326            }
1327        }
1328    }
1329
1330    pub fn parse_cannot_be_a_base_path<'i>(&mut self, mut input: Input<'i>) -> Input<'i> {
1331        loop {
1332            let input_before_c = input.clone();
1333            match input.next_utf8() {
1334                Some(('?', _)) | Some(('#', _)) if self.context == Context::UrlParser => {
1335                    return input_before_c
1336                }
1337                Some((c, utf8_c)) => {
1338                    self.check_url_code_point(c, &input);
1339                    self.serialization
1340                        .extend(utf8_percent_encode(utf8_c, CONTROLS));
1341                }
1342                None => return input,
1343            }
1344        }
1345    }
1346
1347    #[allow(clippy::too_many_arguments)]
1348    fn with_query_and_fragment(
1349        mut self,
1350        scheme_type: SchemeType,
1351        scheme_end: u32,
1352        username_end: u32,
1353        host_start: u32,
1354        host_end: u32,
1355        host: HostInternal,
1356        port: Option<u16>,
1357        path_start: u32,
1358        remaining: Input<'_>,
1359    ) -> ParseResult<Url> {
1360        let (query_start, fragment_start) =
1361            self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
1362        Ok(Url {
1363            serialization: self.serialization,
1364            scheme_end,
1365            username_end,
1366            host_start,
1367            host_end,
1368            host,
1369            port,
1370            path_start,
1371            query_start,
1372            fragment_start,
1373        })
1374    }
1375
1376    /// Return (query_start, fragment_start)
1377    fn parse_query_and_fragment(
1378        &mut self,
1379        scheme_type: SchemeType,
1380        scheme_end: u32,
1381        mut input: Input<'_>,
1382    ) -> ParseResult<(Option<u32>, Option<u32>)> {
1383        let mut query_start = None;
1384        match input.next() {
1385            Some('#') => {}
1386            Some('?') => {
1387                query_start = Some(to_u32(self.serialization.len())?);
1388                self.serialization.push('?');
1389                let remaining = self.parse_query(scheme_type, scheme_end, input);
1390                if let Some(remaining) = remaining {
1391                    input = remaining
1392                } else {
1393                    return Ok((query_start, None));
1394                }
1395            }
1396            None => return Ok((None, None)),
1397            _ => panic!("Programming error. parse_query_and_fragment() called without ? or #"),
1398        }
1399
1400        let fragment_start = to_u32(self.serialization.len())?;
1401        self.serialization.push('#');
1402        self.parse_fragment(input);
1403        Ok((query_start, Some(fragment_start)))
1404    }
1405
1406    pub fn parse_query<'i>(
1407        &mut self,
1408        scheme_type: SchemeType,
1409        scheme_end: u32,
1410        mut input: Input<'i>,
1411    ) -> Option<Input<'i>> {
1412        let len = input.chars.as_str().len();
1413        let mut query = String::with_capacity(len); // FIXME: use a streaming decoder instead
1414        let mut remaining = None;
1415        while let Some(c) = input.next() {
1416            if c == '#' && self.context == Context::UrlParser {
1417                remaining = Some(input);
1418                break;
1419            } else {
1420                self.check_url_code_point(c, &input);
1421                query.push(c);
1422            }
1423        }
1424
1425        let encoding = match &self.serialization[..scheme_end as usize] {
1426            "http" | "https" | "file" | "ftp" => self.query_encoding_override,
1427            _ => None,
1428        };
1429        let query_bytes = if let Some(o) = encoding {
1430            o(&query)
1431        } else {
1432            query.as_bytes().into()
1433        };
1434        let set = if scheme_type.is_special() {
1435            SPECIAL_QUERY
1436        } else {
1437            QUERY
1438        };
1439        self.serialization.extend(percent_encode(&query_bytes, set));
1440        remaining
1441    }
1442
1443    fn fragment_only(mut self, base_url: &Url, mut input: Input<'_>) -> ParseResult<Url> {
1444        let before_fragment = match base_url.fragment_start {
1445            Some(i) => base_url.slice(..i),
1446            None => &*base_url.serialization,
1447        };
1448        debug_assert!(self.serialization.is_empty());
1449        self.serialization
1450            .reserve(before_fragment.len() + input.chars.as_str().len());
1451        self.serialization.push_str(before_fragment);
1452        self.serialization.push('#');
1453        let next = input.next();
1454        debug_assert!(next == Some('#'));
1455        self.parse_fragment(input);
1456        Ok(Url {
1457            serialization: self.serialization,
1458            fragment_start: Some(to_u32(before_fragment.len())?),
1459            ..*base_url
1460        })
1461    }
1462
1463    pub fn parse_fragment(&mut self, mut input: Input<'_>) {
1464        while let Some((c, utf8_c)) = input.next_utf8() {
1465            if c == '\0' {
1466                self.log_violation(SyntaxViolation::NullInFragment)
1467            } else {
1468                self.check_url_code_point(c, &input);
1469            }
1470            self.serialization
1471                .extend(utf8_percent_encode(utf8_c, FRAGMENT));
1472        }
1473    }
1474
1475    fn check_url_code_point(&self, c: char, input: &Input<'_>) {
1476        if let Some(vfn) = self.violation_fn {
1477            if c == '%' {
1478                let mut input = input.clone();
1479                if !matches!((input.next(), input.next()), (Some(a), Some(b))
1480                             if is_ascii_hex_digit(a) && is_ascii_hex_digit(b))
1481                {
1482                    vfn(SyntaxViolation::PercentDecode)
1483                }
1484            } else if !is_url_code_point(c) {
1485                vfn(SyntaxViolation::NonUrlCodePoint)
1486            }
1487        }
1488    }
1489}
1490
1491#[inline]
1492fn is_ascii_hex_digit(c: char) -> bool {
1493    matches!(c, 'a'..='f' | 'A'..='F' | '0'..='9')
1494}
1495
1496// Non URL code points:
1497// U+0000 to U+0020 (space)
1498// " # % < > [ \ ] ^ ` { | }
1499// U+007F to U+009F
1500// surrogates
1501// U+FDD0 to U+FDEF
1502// Last two of each plane: U+__FFFE to U+__FFFF for __ in 00 to 10 hex
1503#[inline]
1504fn is_url_code_point(c: char) -> bool {
1505    matches!(c,
1506        'a'..='z' |
1507        'A'..='Z' |
1508        '0'..='9' |
1509        '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | '-' |
1510        '.' | '/' | ':' | ';' | '=' | '?' | '@' | '_' | '~' |
1511        '\u{A0}'..='\u{D7FF}' | '\u{E000}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' |
1512        '\u{10000}'..='\u{1FFFD}' | '\u{20000}'..='\u{2FFFD}' |
1513        '\u{30000}'..='\u{3FFFD}' | '\u{40000}'..='\u{4FFFD}' |
1514        '\u{50000}'..='\u{5FFFD}' | '\u{60000}'..='\u{6FFFD}' |
1515        '\u{70000}'..='\u{7FFFD}' | '\u{80000}'..='\u{8FFFD}' |
1516        '\u{90000}'..='\u{9FFFD}' | '\u{A0000}'..='\u{AFFFD}' |
1517        '\u{B0000}'..='\u{BFFFD}' | '\u{C0000}'..='\u{CFFFD}' |
1518        '\u{D0000}'..='\u{DFFFD}' | '\u{E1000}'..='\u{EFFFD}' |
1519        '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}')
1520}
1521
1522/// https://url.spec.whatwg.org/#c0-controls-and-space
1523#[inline]
1524fn c0_control_or_space(ch: char) -> bool {
1525    ch <= ' ' // U+0000 to U+0020
1526}
1527
1528/// https://infra.spec.whatwg.org/#ascii-tab-or-newline
1529#[inline]
1530fn ascii_tab_or_new_line(ch: char) -> bool {
1531    matches!(ch, '\t' | '\r' | '\n')
1532}
1533
1534/// https://url.spec.whatwg.org/#ascii-alpha
1535#[inline]
1536pub fn ascii_alpha(ch: char) -> bool {
1537    matches!(ch, 'a'..='z' | 'A'..='Z')
1538}
1539
1540#[inline]
1541pub fn to_u32(i: usize) -> ParseResult<u32> {
1542    if i <= ::std::u32::MAX as usize {
1543        Ok(i as u32)
1544    } else {
1545        Err(ParseError::Overflow)
1546    }
1547}
1548
1549fn is_normalized_windows_drive_letter(segment: &str) -> bool {
1550    is_windows_drive_letter(segment) && segment.as_bytes()[1] == b':'
1551}
1552
1553/// Whether the scheme is file:, the path has a single segment, and that segment
1554/// is a Windows drive letter
1555#[inline]
1556pub fn is_windows_drive_letter(segment: &str) -> bool {
1557    segment.len() == 2 && starts_with_windows_drive_letter(segment)
1558}
1559
1560/// Whether path starts with a root slash
1561/// and a windows drive letter eg: "/c:" or "/a:/"
1562fn path_starts_with_windows_drive_letter(s: &str) -> bool {
1563    if let Some(c) = s.as_bytes().first() {
1564        matches!(c, b'/' | b'\\' | b'?' | b'#') && starts_with_windows_drive_letter(&s[1..])
1565    } else {
1566        false
1567    }
1568}
1569
1570fn starts_with_windows_drive_letter(s: &str) -> bool {
1571    s.len() >= 2
1572        && ascii_alpha(s.as_bytes()[0] as char)
1573        && matches!(s.as_bytes()[1], b':' | b'|')
1574        && (s.len() == 2 || matches!(s.as_bytes()[2], b'/' | b'\\' | b'?' | b'#'))
1575}
1576
1577/// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
1578fn starts_with_windows_drive_letter_segment(input: &Input<'_>) -> bool {
1579    let mut input = input.clone();
1580    match (input.next(), input.next(), input.next()) {
1581        // its first two code points are a Windows drive letter
1582        // its third code point is U+002F (/), U+005C (\), U+003F (?), or U+0023 (#).
1583        (Some(a), Some(b), Some(c))
1584            if ascii_alpha(a) && matches!(b, ':' | '|') && matches!(c, '/' | '\\' | '?' | '#') =>
1585        {
1586            true
1587        }
1588        // its first two code points are a Windows drive letter
1589        // its length is 2
1590        (Some(a), Some(b), None) if ascii_alpha(a) && matches!(b, ':' | '|') => true,
1591        _ => false,
1592    }
1593}