1mod matcher;
62
63#[cfg(feature = "remote_list")]
64#[cfg(test)]
65mod tests;
66
67use std::{collections::HashMap, fmt, fs::File, io::Read, net::IpAddr, path::Path, str::FromStr};
68#[cfg(feature = "remote_list")]
69use std::{io::Write, net::TcpStream, time::Duration};
70
71pub mod errors;
72pub use crate::errors::{Error, ErrorKind, Result};
73
74use idna::domain_to_unicode;
75#[cfg(feature = "remote_list")]
76use native_tls::TlsConnector;
77use url::Url;
78
79pub const LIST_URL: &str = "https://publicsuffix.org/list/public_suffix_list.dat";
81
82const PREVAILING_STAR_RULE: &str = "*";
83
84#[derive(Debug, PartialEq, Eq, Hash)]
85struct Suffix {
86 rule: String,
87 typ: Type,
88}
89
90#[derive(Debug)]
91struct ListLeaf {
92 typ: Type,
93 is_exception_rule: bool,
94}
95
96impl ListLeaf {
97 fn new(typ: Type, is_exception_rule: bool) -> Self {
98 Self {
99 typ,
100 is_exception_rule,
101 }
102 }
103}
104
105#[derive(Debug)]
106struct ListNode {
107 children: HashMap<String, ListNode>,
108 leaf: Option<ListLeaf>,
109}
110
111impl ListNode {
112 fn new() -> Self {
113 Self {
114 children: HashMap::new(),
115 leaf: None,
116 }
117 }
118}
119
120#[derive(Debug)]
126pub struct List {
127 root: ListNode,
128 all: Vec<Suffix>, }
130
131#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
132enum Type {
133 Icann,
134 Private,
135}
136
137#[derive(Debug, Clone, PartialEq, Eq, Hash)]
141pub struct Domain {
142 full: String,
143 typ: Option<Type>,
144 suffix: Option<String>,
145 registrable: Option<String>,
146}
147
148#[derive(Debug, Clone, PartialEq, Eq, Hash)]
152pub enum Host {
153 Ip(IpAddr),
154 Domain(Domain),
155}
156
157#[derive(Debug, Clone, PartialEq, Eq, Hash)]
161pub struct DnsName {
162 name: String,
163 domain: Option<Domain>,
164}
165
166pub trait IntoUrl {
168 fn into_url(self) -> Result<Url>;
169}
170
171impl IntoUrl for Url {
172 fn into_url(self) -> Result<Url> {
173 Ok(self)
174 }
175}
176
177impl<'a> IntoUrl for &'a str {
178 fn into_url(self) -> Result<Url> {
179 Ok(Url::parse(self)?)
180 }
181}
182
183impl<'a> IntoUrl for &'a String {
184 fn into_url(self) -> Result<Url> {
185 Ok(Url::parse(self)?)
186 }
187}
188
189impl IntoUrl for String {
190 fn into_url(self) -> Result<Url> {
191 Ok(Url::parse(&self)?)
192 }
193}
194
195#[cfg(feature = "remote_list")]
196fn request<U: IntoUrl>(u: U) -> Result<String> {
197 let url = u.into_url()?;
198 let host = match url.host_str() {
199 Some(host) => host,
200 None => {
201 return Err(ErrorKind::NoHost.into());
202 }
203 };
204 let port = match url.port_or_known_default() {
205 Some(port) => port,
206 None => {
207 return Err(ErrorKind::NoPort.into());
208 }
209 };
210 let data = format!("GET {} HTTP/1.0\r\nHost: {}\r\n\r\n", url.path(), host);
211 let addr = format!("{}:{}", host, port);
212 let stream = TcpStream::connect(addr)?;
213 let timeout = Duration::from_secs(2);
214 stream.set_read_timeout(Some(timeout))?;
215 stream.set_write_timeout(Some(timeout))?;
216
217 let mut res = String::new();
218
219 match url.scheme() {
220 scheme if scheme == "https" => {
221 let connector = TlsConnector::builder().build()?;
222 let mut stream = connector.connect(host, stream)?;
223 stream.write_all(data.as_bytes())?;
224 stream.read_to_string(&mut res)?;
225 }
226 scheme if scheme == "http" => {
227 let mut stream = stream;
228 stream.write_all(data.as_bytes())?;
229 stream.read_to_string(&mut res)?;
230 }
231 _ => {
232 return Err(ErrorKind::UnsupportedScheme.into());
233 }
234 }
235
236 Ok(res)
237}
238
239impl List {
240 fn append(&mut self, mut rule: &str, typ: Type) -> Result<()> {
241 let mut is_exception_rule = false;
242 if rule.starts_with('!') {
243 is_exception_rule = true;
244 rule = &rule[1..];
245 }
246
247 let mut current = &mut self.root;
248 for label in rule.rsplit('.') {
249 if label.is_empty() {
250 return Err(ErrorKind::InvalidRule(rule.into()).into());
251 }
252
253 let cur = current;
254 current = cur
255 .children
256 .entry(label.to_owned())
257 .or_insert_with(ListNode::new);
258 }
259
260 current.leaf = Some(ListLeaf::new(typ, is_exception_rule));
261
262 self.all.push(Suffix {
264 rule: rule.to_owned(),
265 typ,
266 });
267
268 Ok(())
269 }
270
271 fn build(res: &str) -> Result<List> {
272 let mut typ = None;
273 let mut list = List::empty();
274 for line in res.lines() {
275 match line {
276 line if line.contains("BEGIN ICANN DOMAINS") => {
277 typ = Some(Type::Icann);
278 }
279 line if line.contains("BEGIN PRIVATE DOMAINS") => {
280 typ = Some(Type::Private);
281 }
282 line if line.starts_with("//") => {
283 continue;
284 }
285 line => match typ {
286 Some(typ) => {
287 let rule = match line.split_whitespace().next() {
288 Some(rule) => rule,
289 None => continue,
290 };
291 list.append(rule, typ)?;
292 }
293 None => {
294 continue;
295 }
296 },
297 }
298 }
299 if list.root.children.is_empty() || list.all().is_empty() {
300 return Err(ErrorKind::InvalidList.into());
301 }
302
303 list.append(PREVAILING_STAR_RULE, Type::Icann)?; Ok(list)
306 }
307
308 pub fn from_string(string: String) -> Result<List> {
314 Self::from_str(&string)
315 }
316
317 #[allow(clippy::should_implement_trait)]
323 pub fn from_str(string: &str) -> Result<List> {
324 Self::build(string)
325 }
326
327 pub fn empty() -> List {
333 List {
334 root: ListNode::new(),
335 all: Vec::new(),
336 }
337 }
338
339 #[cfg(feature = "remote_list")]
341 pub fn from_url<U: IntoUrl>(url: U) -> Result<List> {
342 let s = request(url)?;
343 Self::from_str(&s)
344 }
345
346 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<List> {
348 File::open(path)
349 .map_err(|err| ErrorKind::Io(err).into())
350 .and_then(|mut data| {
351 let mut res = String::new();
352 data.read_to_string(&mut res)?;
353 Self::from_str(&res)
354 })
355 }
356
357 pub fn from_reader<R: Read>(mut reader: R) -> Result<List> {
363 let mut res = String::new();
364 reader.read_to_string(&mut res)?;
365 Self::build(&res)
366 }
367
368 #[cfg(feature = "remote_list")]
370 pub fn fetch() -> Result<List> {
371 let github =
372 "https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat";
373
374 Self::from_url(LIST_URL)
375 .or_else(|_| Self::from_url(github))
378 }
379
380 fn find_type(&self, typ: Type) -> Vec<&str> {
381 self.all_internal()
382 .filter(|s| s.typ == typ)
383 .map(|s| s.rule.as_str())
384 .collect()
385 }
386
387 pub fn icann(&self) -> Vec<&str> {
389 self.find_type(Type::Icann)
390 }
391
392 pub fn private(&self) -> Vec<&str> {
394 self.find_type(Type::Private)
395 }
396
397 pub fn all(&self) -> Vec<&str> {
399 self.all_internal().map(|s| s.rule.as_str()).collect()
400 }
401
402 fn all_internal(&self) -> impl Iterator<Item = &Suffix> {
403 self.all
404 .iter()
405 .filter(|s| s.rule != PREVAILING_STAR_RULE)
407 }
408
409 pub fn parse_domain(&self, domain: &str) -> Result<Domain> {
411 Domain::parse(domain, self, true)
412 }
413
414 pub fn parse_host(&self, host: &str) -> Result<Host> {
419 Host::parse(host, self)
420 }
421
422 pub fn parse_url<U: IntoUrl>(&self, url: U) -> Result<Host> {
424 let url = url.into_url()?;
425 match url.scheme() {
426 "mailto" => match url.host_str() {
427 Some(host) => self.parse_email(&format!("{}@{}", url.username(), host)),
428 None => Err(ErrorKind::InvalidEmail.into()),
429 },
430 _ => match url.host_str() {
431 Some(host) => self.parse_host(host),
432 None => Err(ErrorKind::NoHost.into()),
433 },
434 }
435 }
436
437 pub fn parse_email(&self, address: &str) -> Result<Host> {
450 let mut parts = address.rsplitn(2, '@');
451 let host = match parts.next() {
452 Some(host) => host,
453 None => {
454 return Err(ErrorKind::InvalidEmail.into());
455 }
456 };
457 let local = match parts.next() {
458 Some(local) => local,
459 None => {
460 return Err(ErrorKind::InvalidEmail.into());
461 }
462 };
463 if local.chars().count() > 64
464 || address.chars().count() > 254
465 || (!local.starts_with('"') && local.contains(".."))
466 || !matcher::is_email_local(local)
467 {
468 return Err(ErrorKind::InvalidEmail.into());
469 }
470 self.parse_host(host)
471 }
472
473 pub fn parse_str(&self, string: &str) -> Result<Host> {
477 if string.contains("://") {
478 self.parse_url(string)
479 } else if string.contains('@') {
480 self.parse_email(string)
481 } else {
482 self.parse_host(string)
483 }
484 }
485
486 pub fn parse_dns_name(&self, name: &str) -> Result<DnsName> {
488 let mut dns_name = DnsName {
489 name: Domain::try_to_ascii(name).map_err(|_| ErrorKind::InvalidDomain(name.into()))?,
490 domain: None,
491 };
492 if let Ok(mut domain) = Domain::parse(name, self, false) {
493 if let Some(root) = domain.root() {
494 if Domain::has_valid_syntax(&root) {
495 domain.full = root.to_string();
496 dns_name.domain = Some(domain);
497 }
498 }
499 }
500 Ok(dns_name)
501 }
502}
503
504impl Host {
505 fn parse(mut host: &str, list: &List) -> Result<Host> {
506 if let Ok(domain) = Domain::parse(host, list, true) {
507 return Ok(Host::Domain(domain));
508 }
509 if host.starts_with('[')
510 && !host.starts_with("[[")
511 && host.ends_with(']')
512 && !host.ends_with("]]")
513 {
514 host = host.trim_start_matches('[').trim_end_matches(']');
515 };
516 if let Ok(ip) = IpAddr::from_str(host) {
517 return Ok(Host::Ip(ip));
518 }
519 Err(ErrorKind::InvalidHost.into())
520 }
521
522 pub fn is_ip(&self) -> bool {
524 if let Host::Ip(_) = self {
525 return true;
526 }
527 false
528 }
529
530 pub fn is_domain(&self) -> bool {
532 if let Host::Domain(_) = self {
533 return true;
534 }
535 false
536 }
537}
538
539impl fmt::Display for Host {
540 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
541 match self {
542 Host::Ip(ref ip) => write!(f, "{}", ip),
543 Host::Domain(ref domain) => write!(f, "{}", domain),
544 }
545 }
546}
547
548impl Domain {
549 pub fn has_valid_syntax(domain: &str) -> bool {
554 if domain.starts_with('.') {
558 return false;
559 }
560 let domain = match Self::try_to_ascii(domain) {
563 Ok(domain) => domain,
564 Err(_) => {
565 return false;
566 }
567 };
568 let mut labels: Vec<&str> = domain.split('.').collect();
569 if domain.ends_with('.') {
571 labels.pop();
572 }
573 if labels.len() > 127 {
575 return false;
576 }
577 labels.reverse();
578 for (i, label) in labels.iter().enumerate() {
579 if i == 0 && label.parse::<f64>().is_ok() {
581 return false;
582 }
583 if !matcher::is_label(label) {
585 return false;
586 }
587 }
588 true
589 }
590
591 pub fn full(&self) -> &str {
593 &self.full
594 }
595
596 fn assemble(input: &str, s_len: usize) -> String {
597 let domain = input.to_lowercase();
598
599 let d_labels: Vec<&str> = domain.trim_end_matches('.').split('.').rev().collect();
600
601 (&d_labels[..s_len])
602 .iter()
603 .rev()
604 .copied()
605 .collect::<Vec<_>>()
606 .join(".")
607 }
608
609 fn find_match(input: &str, domain: &str, list: &List) -> Domain {
610 let mut longest_valid = None;
611
612 let mut current = &list.root;
613 let mut s_labels_len = 0;
614 let mut wildcard_match = false;
615
616 for label in domain.rsplit('.') {
617 if let Some(child) = current.children.get(label) {
618 current = child;
619 s_labels_len += 1;
620 } else if let Some(child) = current.children.get("*") {
621 current = child;
623 s_labels_len += 1;
624 wildcard_match = true;
625 } else {
626 break;
628 }
629
630 if let Some(list_leaf) = ¤t.leaf {
631 longest_valid = Some((list_leaf, s_labels_len));
632 }
633 }
634
635 match longest_valid {
636 Some((leaf, suffix_len)) => {
637 let typ = if !wildcard_match {
638 Some(leaf.typ)
639 } else {
640 None
641 };
642
643 let suffix_len = if leaf.is_exception_rule {
644 suffix_len - 1
645 } else {
646 suffix_len
647 };
648
649 let suffix = Some(Self::assemble(input, suffix_len));
650 let d_labels_len = domain.match_indices('.').count() + 1;
651
652 let registrable = if d_labels_len > suffix_len {
653 Some(Self::assemble(input, suffix_len + 1))
654 } else {
655 None
656 };
657
658 Domain {
659 full: input.to_owned(),
660 typ,
661 suffix,
662 registrable,
663 }
664 }
665 None => Domain {
666 full: input.to_owned(),
667 typ: None,
668 suffix: None,
669 registrable: None,
670 },
671 }
672 }
673
674 fn try_to_ascii(domain: &str) -> Result<String> {
675 let result = idna::Config::default()
676 .transitional_processing(true)
677 .verify_dns_length(true)
678 .to_ascii(domain);
679 result.map_err(|error| ErrorKind::Uts46(error).into())
680 }
681
682 fn parse(domain: &str, list: &List, check_syntax: bool) -> Result<Domain> {
683 if check_syntax && !Self::has_valid_syntax(domain) {
684 return Err(ErrorKind::InvalidDomain(domain.into()).into());
685 }
686 let input = domain.trim_end_matches('.');
687 let (domain, res) = domain_to_unicode(input);
688 if let Err(errors) = res {
689 return Err(ErrorKind::Uts46(errors).into());
690 }
691 Ok(Self::find_match(input, &domain, list))
692 }
693
694 pub fn root(&self) -> Option<&str> {
696 self.registrable.as_ref().map(|x| &x[..])
697 }
698
699 pub fn suffix(&self) -> Option<&str> {
701 self.suffix.as_ref().map(|x| &x[..])
702 }
703
704 pub fn is_private(&self) -> bool {
706 self.typ.map(|t| t == Type::Private).unwrap_or(false)
707 }
708
709 pub fn is_icann(&self) -> bool {
711 self.typ.map(|t| t == Type::Icann).unwrap_or(false)
712 }
713
714 pub fn has_known_suffix(&self) -> bool {
724 self.typ.is_some()
725 }
726}
727
728impl fmt::Display for Domain {
729 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
730 write!(f, "{}", self.full.trim_end_matches('.').to_lowercase())
731 }
732}
733
734impl DnsName {
735 pub fn domain(&self) -> Option<&Domain> {
737 self.domain.as_ref()
738 }
739}
740
741impl fmt::Display for DnsName {
742 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
743 self.name.fmt(f)
744 }
745}