1use self::Mapping::*;
13use crate::punycode;
14use std::{error::Error as StdError, fmt};
15use unicode_bidi::{bidi_class, BidiClass};
16use unicode_normalization::char::is_combining_mark;
17use unicode_normalization::{is_nfc, UnicodeNormalization};
18
19include!("uts46_mapping_table.rs");
20
21const PUNYCODE_PREFIX: &str = "xn--";
22
23#[derive(Debug)]
24struct StringTableSlice {
25 byte_start_lo: u8,
28 byte_start_hi: u8,
29 byte_len: u8,
30}
31
32fn decode_slice(slice: &StringTableSlice) -> &'static str {
33 let lo = slice.byte_start_lo as usize;
34 let hi = slice.byte_start_hi as usize;
35 let start = (hi << 8) | lo;
36 let len = slice.byte_len as usize;
37 &STRING_TABLE[start..(start + len)]
38}
39
40#[repr(u8)]
41#[derive(Debug)]
42enum Mapping {
43 Valid,
44 Ignored,
45 Mapped(StringTableSlice),
46 Deviation(StringTableSlice),
47 Disallowed,
48 DisallowedStd3Valid,
49 DisallowedStd3Mapped(StringTableSlice),
50 DisallowedIdna2008,
51}
52
53fn find_char(codepoint: char) -> &'static Mapping {
54 let idx = match TABLE.binary_search_by_key(&codepoint, |&val| val.0) {
55 Ok(idx) => idx,
56 Err(idx) => idx - 1,
57 };
58
59 const SINGLE_MARKER: u16 = 1 << 15;
60
61 let (base, x) = TABLE[idx];
62 let single = (x & SINGLE_MARKER) != 0;
63 let offset = !SINGLE_MARKER & x;
64
65 if single {
66 &MAPPING_TABLE[offset as usize]
67 } else {
68 &MAPPING_TABLE[(offset + (codepoint as u16 - base as u16)) as usize]
69 }
70}
71
72struct Mapper<'a> {
73 chars: std::str::Chars<'a>,
74 config: Config,
75 errors: &'a mut Errors,
76 slice: Option<std::str::Chars<'static>>,
77}
78
79impl<'a> Iterator for Mapper<'a> {
80 type Item = char;
81
82 fn next(&mut self) -> Option<Self::Item> {
83 loop {
84 if let Some(s) = &mut self.slice {
85 match s.next() {
86 Some(c) => return Some(c),
87 None => {
88 self.slice = None;
89 }
90 }
91 }
92
93 let codepoint = self.chars.next()?;
94 if let '.' | '-' | 'a'..='z' | '0'..='9' = codepoint {
95 return Some(codepoint);
96 }
97
98 return Some(match *find_char(codepoint) {
99 Mapping::Valid => codepoint,
100 Mapping::Ignored => continue,
101 Mapping::Mapped(ref slice) => {
102 self.slice = Some(decode_slice(slice).chars());
103 continue;
104 }
105 Mapping::Deviation(ref slice) => {
106 if self.config.transitional_processing {
107 self.slice = Some(decode_slice(slice).chars());
108 continue;
109 } else {
110 codepoint
111 }
112 }
113 Mapping::Disallowed => {
114 self.errors.disallowed_character = true;
115 codepoint
116 }
117 Mapping::DisallowedStd3Valid => {
118 if self.config.use_std3_ascii_rules {
119 self.errors.disallowed_by_std3_ascii_rules = true;
120 };
121 codepoint
122 }
123 Mapping::DisallowedStd3Mapped(ref slice) => {
124 if self.config.use_std3_ascii_rules {
125 self.errors.disallowed_mapped_in_std3 = true;
126 };
127 self.slice = Some(decode_slice(slice).chars());
128 continue;
129 }
130 Mapping::DisallowedIdna2008 => {
131 if self.config.use_idna_2008_rules {
132 self.errors.disallowed_in_idna_2008 = true;
133 }
134 codepoint
135 }
136 });
137 }
138 }
139}
140
141fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool {
143 if !is_bidi_domain {
146 return true;
147 }
148
149 let mut chars = label.chars();
150 let first_char_class = match chars.next() {
151 Some(c) => bidi_class(c),
152 None => return true, };
154
155 match first_char_class {
156 BidiClass::L => {
158 for c in chars.by_ref() {
160 if !matches!(
161 bidi_class(c),
162 BidiClass::L
163 | BidiClass::EN
164 | BidiClass::ES
165 | BidiClass::CS
166 | BidiClass::ET
167 | BidiClass::ON
168 | BidiClass::BN
169 | BidiClass::NSM
170 ) {
171 return false;
172 }
173 }
174
175 let mut rev_chars = label.chars().rev();
178 let mut last_non_nsm = rev_chars.next();
179 loop {
180 match last_non_nsm {
181 Some(c) if bidi_class(c) == BidiClass::NSM => {
182 last_non_nsm = rev_chars.next();
183 continue;
184 }
185 _ => {
186 break;
187 }
188 }
189 }
190 match last_non_nsm {
191 Some(c) if bidi_class(c) == BidiClass::L || bidi_class(c) == BidiClass::EN => {}
192 Some(_) => {
193 return false;
194 }
195 _ => {}
196 }
197 }
198
199 BidiClass::R | BidiClass::AL => {
201 let mut found_en = false;
202 let mut found_an = false;
203
204 for c in chars {
206 let char_class = bidi_class(c);
207 if char_class == BidiClass::EN {
208 found_en = true;
209 } else if char_class == BidiClass::AN {
210 found_an = true;
211 }
212
213 if !matches!(
214 char_class,
215 BidiClass::R
216 | BidiClass::AL
217 | BidiClass::AN
218 | BidiClass::EN
219 | BidiClass::ES
220 | BidiClass::CS
221 | BidiClass::ET
222 | BidiClass::ON
223 | BidiClass::BN
224 | BidiClass::NSM
225 ) {
226 return false;
227 }
228 }
229 let mut rev_chars = label.chars().rev();
231 let mut last = rev_chars.next();
232 loop {
233 match last {
235 Some(c) if bidi_class(c) == BidiClass::NSM => {
236 last = rev_chars.next();
237 continue;
238 }
239 _ => {
240 break;
241 }
242 }
243 }
244 match last {
245 Some(c)
246 if matches!(
247 bidi_class(c),
248 BidiClass::R | BidiClass::AL | BidiClass::EN | BidiClass::AN
249 ) => {}
250 _ => {
251 return false;
252 }
253 }
254
255 if found_an && found_en {
257 return false;
258 }
259 }
260
261 _ => {
263 return false;
264 }
265 }
266
267 true
268}
269
270fn check_validity(label: &str, config: Config, errors: &mut Errors) {
276 let first_char = label.chars().next();
277 if first_char == None {
278 return;
280 }
281
282 if config.check_hyphens && (label.starts_with('-') || label.ends_with('-')) {
290 errors.check_hyphens = true;
291 return;
292 }
293
294 if is_combining_mark(first_char.unwrap()) {
300 errors.start_combining_mark = true;
301 return;
302 }
303
304 if label.chars().any(|c| match *find_char(c) {
306 Mapping::Valid | Mapping::DisallowedIdna2008 => false,
307 Mapping::Deviation(_) => config.transitional_processing,
308 Mapping::DisallowedStd3Valid => config.use_std3_ascii_rules,
309 _ => true,
310 }) {
311 errors.invalid_mapping = true;
312 }
313
314 }
320
321fn is_simple(domain: &str) -> bool {
324 if domain.is_empty() {
325 return false;
326 }
327 let (mut prev, mut puny_prefix) = ('?', 0);
328 for c in domain.chars() {
329 if c == '.' {
330 if prev == '-' {
331 return false;
332 }
333 puny_prefix = 0;
334 continue;
335 } else if puny_prefix == 0 && c == '-' {
336 return false;
337 } else if puny_prefix < 5 {
338 if c == ['x', 'n', '-', '-'][puny_prefix] {
339 puny_prefix += 1;
340 if puny_prefix == 4 {
341 return false;
342 }
343 } else {
344 puny_prefix = 5;
345 }
346 }
347 if !c.is_ascii_lowercase() && !c.is_ascii_digit() {
348 return false;
349 }
350 prev = c;
351 }
352
353 true
354}
355
356fn processing(
358 domain: &str,
359 config: Config,
360 normalized: &mut String,
361 output: &mut String,
362) -> Errors {
363 normalized.clear();
364 let mut errors = Errors::default();
365 let offset = output.len();
366
367 let iter = Mapper {
368 chars: domain.chars(),
369 config,
370 errors: &mut errors,
371 slice: None,
372 };
373
374 normalized.extend(iter.nfc());
375
376 let mut decoder = punycode::Decoder::default();
377 let non_transitional = config.transitional_processing(false);
378 let (mut first, mut has_bidi_labels) = (true, false);
379 for label in normalized.split('.') {
380 if !first {
381 output.push('.');
382 }
383 first = false;
384 if let Some(remainder) = label.strip_prefix(PUNYCODE_PREFIX) {
385 match decoder.decode(remainder) {
386 Ok(decode) => {
387 let start = output.len();
388 output.extend(decode);
389 let decoded_label = &output[start..];
390
391 if !has_bidi_labels {
392 has_bidi_labels |= is_bidi_domain(decoded_label);
393 }
394
395 if !errors.is_err() {
396 if !is_nfc(decoded_label) {
397 errors.nfc = true;
398 } else {
399 check_validity(decoded_label, non_transitional, &mut errors);
400 }
401 }
402 }
403 Err(()) => {
404 has_bidi_labels = true;
405 errors.punycode = true;
406 }
407 }
408 } else {
409 if !has_bidi_labels {
410 has_bidi_labels |= is_bidi_domain(label);
411 }
412
413 check_validity(label, config, &mut errors);
415 output.push_str(label)
416 }
417 }
418
419 for label in output[offset..].split('.') {
420 if !passes_bidi(label, has_bidi_labels) {
424 errors.check_bidi = true;
425 break;
426 }
427 }
428
429 errors
430}
431
432#[derive(Default)]
433pub struct Idna {
434 config: Config,
435 normalized: String,
436 output: String,
437}
438
439impl Idna {
440 pub fn new(config: Config) -> Self {
441 Self {
442 config,
443 normalized: String::new(),
444 output: String::new(),
445 }
446 }
447
448 pub fn to_ascii_inner(&mut self, domain: &str, out: &mut String) -> Errors {
449 if is_simple(domain) {
450 out.push_str(domain);
451 return Errors::default();
452 }
453 let mut errors = processing(domain, self.config, &mut self.normalized, out);
454 self.output = std::mem::replace(out, String::with_capacity(out.len()));
455 let mut first = true;
456 for label in self.output.split('.') {
457 if !first {
458 out.push('.');
459 }
460 first = false;
461
462 if label.is_ascii() {
463 out.push_str(label);
464 } else {
465 let offset = out.len();
466 out.push_str(PUNYCODE_PREFIX);
467 if let Err(()) = punycode::encode_into(label.chars(), out) {
468 errors.punycode = true;
469 out.truncate(offset);
470 }
471 }
472 }
473 errors
474 }
475
476 #[allow(clippy::wrong_self_convention)]
478 pub fn to_ascii<'a>(&'a mut self, domain: &str, out: &mut String) -> Result<(), Errors> {
479 let mut errors = self.to_ascii_inner(domain, out);
480
481 if self.config.verify_dns_length {
482 let domain = if out.ends_with('.') {
483 &out[..out.len() - 1]
484 } else {
485 &*out
486 };
487 if domain.is_empty() || domain.split('.').any(|label| label.is_empty()) {
488 errors.too_short_for_dns = true;
489 }
490 if domain.len() > 253 || domain.split('.').any(|label| label.len() > 63) {
491 errors.too_long_for_dns = true;
492 }
493 }
494
495 errors.into()
496 }
497
498 #[allow(clippy::wrong_self_convention)]
500 pub fn to_unicode<'a>(&'a mut self, domain: &str, out: &mut String) -> Result<(), Errors> {
501 if is_simple(domain) {
502 out.push_str(domain);
503 return Errors::default().into();
504 }
505 processing(domain, self.config, &mut self.normalized, out).into()
506 }
507}
508
509#[derive(Clone, Copy)]
510pub struct Config {
511 use_std3_ascii_rules: bool,
512 transitional_processing: bool,
513 verify_dns_length: bool,
514 check_hyphens: bool,
515 use_idna_2008_rules: bool,
516}
517
518impl Default for Config {
520 fn default() -> Self {
521 Config {
522 use_std3_ascii_rules: false,
523 transitional_processing: false,
524 check_hyphens: false,
525 verify_dns_length: false,
530 use_idna_2008_rules: false,
531 }
532 }
533}
534
535impl Config {
536 #[inline]
537 pub fn use_std3_ascii_rules(mut self, value: bool) -> Self {
538 self.use_std3_ascii_rules = value;
539 self
540 }
541
542 #[inline]
543 pub fn transitional_processing(mut self, value: bool) -> Self {
544 self.transitional_processing = value;
545 self
546 }
547
548 #[inline]
549 pub fn verify_dns_length(mut self, value: bool) -> Self {
550 self.verify_dns_length = value;
551 self
552 }
553
554 #[inline]
555 pub fn check_hyphens(mut self, value: bool) -> Self {
556 self.check_hyphens = value;
557 self
558 }
559
560 #[inline]
561 pub fn use_idna_2008_rules(mut self, value: bool) -> Self {
562 self.use_idna_2008_rules = value;
563 self
564 }
565
566 pub fn to_ascii(self, domain: &str) -> Result<String, Errors> {
568 let mut result = String::with_capacity(domain.len());
569 let mut codec = Idna::new(self);
570 codec.to_ascii(domain, &mut result).map(|()| result)
571 }
572
573 pub fn to_unicode(self, domain: &str) -> (String, Result<(), Errors>) {
575 let mut codec = Idna::new(self);
576 let mut out = String::with_capacity(domain.len());
577 let result = codec.to_unicode(domain, &mut out);
578 (out, result)
579 }
580}
581
582fn is_bidi_domain(s: &str) -> bool {
583 for c in s.chars() {
584 if c.is_ascii_graphic() {
585 continue;
586 }
587 match bidi_class(c) {
588 BidiClass::R | BidiClass::AL | BidiClass::AN => return true,
589 _ => {}
590 }
591 }
592 false
593}
594
595#[derive(Default)]
600pub struct Errors {
601 punycode: bool,
602 check_hyphens: bool,
603 check_bidi: bool,
604 start_combining_mark: bool,
605 invalid_mapping: bool,
606 nfc: bool,
607 disallowed_by_std3_ascii_rules: bool,
608 disallowed_mapped_in_std3: bool,
609 disallowed_character: bool,
610 too_long_for_dns: bool,
611 too_short_for_dns: bool,
612 disallowed_in_idna_2008: bool,
613}
614
615impl Errors {
616 fn is_err(&self) -> bool {
617 let Errors {
618 punycode,
619 check_hyphens,
620 check_bidi,
621 start_combining_mark,
622 invalid_mapping,
623 nfc,
624 disallowed_by_std3_ascii_rules,
625 disallowed_mapped_in_std3,
626 disallowed_character,
627 too_long_for_dns,
628 too_short_for_dns,
629 disallowed_in_idna_2008,
630 } = *self;
631 punycode
632 || check_hyphens
633 || check_bidi
634 || start_combining_mark
635 || invalid_mapping
636 || nfc
637 || disallowed_by_std3_ascii_rules
638 || disallowed_mapped_in_std3
639 || disallowed_character
640 || too_long_for_dns
641 || too_short_for_dns
642 || disallowed_in_idna_2008
643 }
644}
645
646impl fmt::Debug for Errors {
647 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
648 let Errors {
649 punycode,
650 check_hyphens,
651 check_bidi,
652 start_combining_mark,
653 invalid_mapping,
654 nfc,
655 disallowed_by_std3_ascii_rules,
656 disallowed_mapped_in_std3,
657 disallowed_character,
658 too_long_for_dns,
659 too_short_for_dns,
660 disallowed_in_idna_2008,
661 } = *self;
662
663 let fields = [
664 ("punycode", punycode),
665 ("check_hyphens", check_hyphens),
666 ("check_bidi", check_bidi),
667 ("start_combining_mark", start_combining_mark),
668 ("invalid_mapping", invalid_mapping),
669 ("nfc", nfc),
670 (
671 "disallowed_by_std3_ascii_rules",
672 disallowed_by_std3_ascii_rules,
673 ),
674 ("disallowed_mapped_in_std3", disallowed_mapped_in_std3),
675 ("disallowed_character", disallowed_character),
676 ("too_long_for_dns", too_long_for_dns),
677 ("too_short_for_dns", too_short_for_dns),
678 ("disallowed_in_idna_2008", disallowed_in_idna_2008),
679 ];
680
681 let mut empty = true;
682 f.write_str("Errors { ")?;
683 for (name, val) in &fields {
684 if *val {
685 if !empty {
686 f.write_str(", ")?;
687 }
688 f.write_str(*name)?;
689 empty = false;
690 }
691 }
692
693 if !empty {
694 f.write_str(" }")
695 } else {
696 f.write_str("}")
697 }
698 }
699}
700
701impl From<Errors> for Result<(), Errors> {
702 fn from(e: Errors) -> Result<(), Errors> {
703 if !e.is_err() {
704 Ok(())
705 } else {
706 Err(e)
707 }
708 }
709}
710
711impl StdError for Errors {}
712
713impl fmt::Display for Errors {
714 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
715 fmt::Debug::fmt(self, f)
716 }
717}
718
719#[cfg(test)]
720mod tests {
721 use super::{find_char, Mapping};
722
723 #[test]
724 fn mapping_fast_path() {
725 assert_matches!(find_char('-'), &Mapping::Valid);
726 assert_matches!(find_char('.'), &Mapping::Valid);
727 for c in &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] {
728 assert_matches!(find_char(*c), &Mapping::Valid);
729 }
730 for c in &[
731 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q',
732 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
733 ] {
734 assert_matches!(find_char(*c), &Mapping::Valid);
735 }
736 }
737}