1#![recursion_limit = "1024"]
62
63#[macro_use]
64extern crate error_chain;
65#[cfg(feature = "remote_list")]
66extern crate native_tls;
67#[macro_use]
68extern crate lazy_static;
69extern crate regex;
70extern crate idna;
71extern crate url;
72
73pub mod errors;
74
75#[cfg(feature = "remote_list")]
76#[cfg(test)]
77mod tests;
78
79use std::fs::File;
80use std::path::Path;
81#[cfg(feature = "remote_list")]
82use std::time::Duration;
83#[cfg(feature = "remote_list")]
84use std::net::TcpStream;
85use std::io::Read;
86#[cfg(feature = "remote_list")]
87use std::io::Write;
88use std::collections::HashMap;
89use std::net::IpAddr;
90use std::str::FromStr;
91use std::fmt;
92
93pub use errors::{Result, Error};
94
95use regex::RegexSet;
96use errors::{ErrorKind, ResultExt};
97#[cfg(feature = "remote_list")]
98use native_tls::TlsConnector;
99use idna::{domain_to_unicode};
100use url::Url;
101
102pub const LIST_URL: &'static str = "https://publicsuffix.org/list/public_suffix_list.dat";
104
105const PREVAILING_STAR_RULE: &'static str = "*";
106
107#[derive(Debug, PartialEq, Eq, Hash)]
108struct Suffix {
109 rule: String,
110 typ: Type,
111}
112
113#[derive(Debug)]
114struct ListLeaf {
115 typ: Type,
116 is_exception_rule: bool,
117}
118
119impl ListLeaf {
120 fn new(typ: Type, is_exception_rule: bool) -> Self {
121 Self { typ, is_exception_rule }
122 }
123}
124
125#[derive(Debug)]
126struct ListNode {
127 children: HashMap<String, Box<ListNode>>,
128 leaf: Option<ListLeaf>,
129}
130
131impl ListNode {
132 fn new() -> Self {
133 Self {
134 children: HashMap::new(),
135 leaf: None,
136 }
137 }
138}
139
140#[derive(Debug)]
146pub struct List {
147 root: ListNode,
148 all: Vec<Suffix>, }
150
151#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
152enum Type {
153 Icann,
154 Private,
155}
156
157#[derive(Debug, Clone, PartialEq, Eq, Hash)]
161pub struct Domain {
162 full: String,
163 typ: Option<Type>,
164 suffix: Option<String>,
165 registrable: Option<String>,
166}
167
168#[derive(Debug, Clone, PartialEq, Eq, Hash)]
172pub enum Host {
173 Ip(IpAddr),
174 Domain(Domain),
175}
176
177#[derive(Debug, Clone, PartialEq, Eq, Hash)]
181pub struct DnsName {
182 name: String,
183 domain: Option<Domain>,
184}
185
186lazy_static! {
187 static ref LABEL: RegexSet = {
189 let exprs = vec![
190 r"^[[:alnum:]]+$",
192 r"^[[:alnum:]]+[[:alnum:]-]*[[:alnum:]]+$",
197 ];
198 RegexSet::new(exprs).unwrap()
199 };
200
201 static ref LOCAL: RegexSet = {
204 let global = r#"[[:alnum:]!#$%&'*+/=?^_`{|}~-]"#;
206 let non_ascii = r#"[^\x00-\x7F]"#;
208 let quoted = r#"["(),\\:;<>@\[\]. ]"#;
210 let combined = format!(r#"({}*{}*)"#, global, non_ascii);
212
213 let exprs = vec![
214 format!(r#"^{}+$"#, combined),
216 format!(r#"^({0}+[.]?{0}+)+$"#, combined),
219 format!(r#"^"({}*{}*)*"$"#, combined, quoted),
222 ];
223
224 RegexSet::new(exprs).unwrap()
225 };
226}
227
228pub trait IntoUrl {
230 fn into_url(self) -> Result<Url>;
231}
232
233impl IntoUrl for Url {
234 fn into_url(self) -> Result<Url> {
235 Ok(self)
236 }
237}
238
239impl<'a> IntoUrl for &'a str {
240 fn into_url(self) -> Result<Url> {
241 Ok(Url::parse(self)?)
242 }
243}
244
245impl<'a> IntoUrl for &'a String {
246 fn into_url(self) -> Result<Url> {
247 Ok(Url::parse(self)?)
248 }
249}
250
251impl IntoUrl for String {
252 fn into_url(self) -> Result<Url> {
253 Ok(Url::parse(&self)?)
254 }
255}
256
257#[cfg(feature = "remote_list")]
258fn request<U: IntoUrl>(u: U) -> Result<String> {
259 let url = u.into_url()?;
260 let host = match url.host_str() {
261 Some(host) => host,
262 None => { return Err(ErrorKind::NoHost.into()); }
263 };
264 let port = match url.port_or_known_default() {
265 Some(port) => port,
266 None => { return Err(ErrorKind::NoPort.into()); }
267 };
268 let data = format!("GET {} HTTP/1.0\r\nHost: {}\r\n\r\n", url.path(), host);
269 let addr = format!("{}:{}", host, port);
270 let stream = TcpStream::connect(addr)?;
271 let timeout = Duration::from_secs(2);
272 stream.set_read_timeout(Some(timeout))?;
273 stream.set_write_timeout(Some(timeout))?;
274
275 let mut res = String::new();
276
277 match url.scheme() {
278 scheme if scheme == "https" => {
279 let connector = TlsConnector::builder().build()?;
280 let mut stream = connector.connect(host, stream)?;
281 stream.write_all(data.as_bytes())?;
282 stream.read_to_string(&mut res)?;
283 }
284 scheme if scheme == "http" => {
285 let mut stream = stream;
286 stream.write_all(data.as_bytes())?;
287 stream.read_to_string(&mut res)?;
288 }
289 _ => { return Err(ErrorKind::UnsupportedScheme.into()); }
290 }
291
292 Ok(res)
293}
294
295impl List {
296 fn append(&mut self, mut rule: &str, typ: Type) -> Result<()> {
297 let mut is_exception_rule = false;
298 if rule.starts_with("!") {
299 is_exception_rule = true;
300 rule = &rule[1..];
301 }
302
303 let mut current = &mut self.root;
304 for label in rule.rsplit('.') {
305 if label.is_empty() {
306 return Err(ErrorKind::InvalidRule(rule.into()).into());
307 }
308
309 let cur = current;
310 current = cur.children.entry(label.to_owned())
311 .or_insert(Box::new(ListNode::new()));
312 }
313
314 current.leaf = Some(ListLeaf::new(typ, is_exception_rule));
315
316 self.all.push(Suffix {rule: rule.to_owned(), typ: typ});
318
319 Ok(())
320 }
321
322 fn build(res: &str) -> Result<List> {
323 let mut typ = None;
324 let mut list = List::empty();
325 for line in res.lines() {
326 match line {
327 line if line.contains("BEGIN ICANN DOMAINS") => { typ = Some(Type::Icann); }
328 line if line.contains("BEGIN PRIVATE DOMAINS") => { typ = Some(Type::Private); }
329 line if line.starts_with("//") => { continue; }
330 line => {
331 match typ {
332 Some(typ) => {
333 let rule = match line.split_whitespace().next() {
334 Some(rule) => rule,
335 None => continue,
336 };
337 list.append(rule, typ)?;
338 }
339 None => { continue; }
340 }
341 }
342 }
343 }
344 if list.root.children.is_empty() || list.all().is_empty() {
345 return Err(ErrorKind::InvalidList.into());
346 }
347
348 list.append(PREVAILING_STAR_RULE, Type::Icann)?; Ok(list)
351 }
352
353 pub fn empty() -> List {
359 List {
360 root: ListNode::new(),
361 all: Vec::new(),
362 }
363 }
364
365 #[cfg(feature = "remote_list")]
367 pub fn from_url<U: IntoUrl>(url: U) -> Result<List> {
368 request(url).and_then(Self::from_string)
369 }
370
371 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<List> {
373 File::open(path)
374 .map_err(|err| ErrorKind::Io(err).into())
375 .and_then(|mut data| {
376 let mut res = String::new();
377 data.read_to_string(&mut res)?;
378 Ok(res)
379 })
380 .and_then(Self::from_string)
381 }
382
383 pub fn from_reader<R: Read>(mut reader: R) -> Result<List> {
389 let mut res = String::new();
390 reader.read_to_string(&mut res)?;
391 Self::build(&res)
392 }
393
394 pub fn from_string(string: String) -> Result<List> {
400 Self::from_str(&string)
401 }
402
403 pub fn from_str(string: &str) -> Result<List> {
409 Self::build(string)
410 }
411
412 #[cfg(feature = "remote_list")]
414 pub fn fetch() -> Result<List> {
415 let github = "https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat";
416
417 Self::from_url(LIST_URL)
418 .or_else(|_| Self::from_url(github))
421 }
422
423 fn find_type(&self, typ: Type) -> Vec<&str> {
424 self.all_internal()
425 .filter(|s| s.typ == typ)
426 .map(|s| s.rule.as_str()).collect()
427 }
428
429 pub fn icann(&self) -> Vec<&str> {
431 self.find_type(Type::Icann)
432 }
433
434 pub fn private(&self) -> Vec<&str> {
436 self.find_type(Type::Private)
437 }
438
439 pub fn all(&self) -> Vec<&str> {
441 self.all_internal().map(|s| s.rule.as_str()).collect()
442 }
443
444 fn all_internal(&self) -> impl Iterator<Item = &Suffix> {
445 self.all.iter()
446 .filter(|s| s.rule != PREVAILING_STAR_RULE)
448 }
449
450 pub fn parse_domain(&self, domain: &str) -> Result<Domain> {
452 Domain::parse(domain, self, true)
453 }
454
455 pub fn parse_host(&self, host: &str) -> Result<Host> {
460 Host::parse(host, self)
461 }
462
463 pub fn parse_url<U: IntoUrl>(&self, url: U) -> Result<Host> {
465 let url = url.into_url()?;
466 match url.scheme() {
467 "mailto" => {
468 match url.host_str() {
469 Some(host) => self.parse_email(&format!("{}@{}", url.username(), host)),
470 None => Err(ErrorKind::InvalidEmail.into()),
471 }
472 }
473 _ => {
474 match url.host_str() {
475 Some(host) => self.parse_host(host),
476 None => Err(ErrorKind::NoHost.into()),
477 }
478 }
479 }
480 }
481
482 pub fn parse_email(&self, address: &str) -> Result<Host> {
495 let mut parts = address.rsplitn(2, "@");
496 let host = match parts.next() {
497 Some(host) => host,
498 None => { return Err(ErrorKind::InvalidEmail.into()); }
499 };
500 let local = match parts.next() {
501 Some(local) => local,
502 None => { return Err(ErrorKind::InvalidEmail.into()); }
503 };
504 if local.chars().count() > 64
505 || address.chars().count() > 254
506 || (!local.starts_with('"') && local.contains(".."))
507 || !LOCAL.is_match(local)
508 {
509 return Err(ErrorKind::InvalidEmail.into());
510 }
511 self.parse_host(host)
512 }
513
514 pub fn parse_str(&self, string: &str) -> Result<Host> {
518 if string.contains("://") {
519 self.parse_url(string)
520 } else if string.contains("@") {
521 self.parse_email(string)
522 } else {
523 self.parse_host(string)
524 }
525 }
526
527 pub fn parse_dns_name(&self, name: &str) -> Result<DnsName> {
529 let mut dns_name = DnsName {
530 name: Domain::to_ascii(name).chain_err(|| {
531 ErrorKind::InvalidDomain(name.into())
532 })?,
533 domain: None,
534 };
535 if let Ok(mut domain) = Domain::parse(name, self, false) {
536 if let Some(root) = domain.root().map(|root| root.to_string()) {
537 if Domain::has_valid_syntax(&root) {
538 domain.full = root;
539 dns_name.domain = Some(domain);
540 }
541 }
542 }
543 Ok(dns_name)
544 }
545}
546
547impl Host {
548 fn parse(mut host: &str, list: &List) -> Result<Host> {
549 if let Ok(domain) = Domain::parse(host, list, true) {
550 return Ok(Host::Domain(domain));
551 }
552 if host.starts_with("[")
553 && !host.starts_with("[[")
554 && host.ends_with("]")
555 && !host.ends_with("]]")
556 {
557 host = host
558 .trim_start_matches("[")
559 .trim_end_matches("]");
560 };
561 if let Ok(ip) = IpAddr::from_str(host) {
562 return Ok(Host::Ip(ip));
563 }
564 Err(ErrorKind::InvalidHost.into())
565 }
566
567 pub fn is_ip(&self) -> bool {
569 if let &Host::Ip(_) = self {
570 return true;
571 }
572 false
573 }
574
575 pub fn is_domain(&self) -> bool {
577 if let &Host::Domain(_) = self {
578 return true;
579 }
580 false
581 }
582}
583
584impl fmt::Display for Host {
585 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
586 match self {
587 &Host::Ip(ref ip) => write!(f, "{}", ip),
588 &Host::Domain(ref domain) => write!(f, "{}", domain),
589 }
590 }
591}
592
593impl Domain {
594 pub fn has_valid_syntax(domain: &str) -> bool {
599 if domain.starts_with('.') { return false; }
603 let domain = match Self::to_ascii(domain) {
606 Ok(domain) => { domain }
607 Err(_) => { return false; }
608 };
609 let mut labels: Vec<&str> = domain.split('.').collect();
610 if domain.ends_with(".") { labels.pop(); }
612 if labels.len() > 127 { return false; }
614 labels.reverse();
615 for (i, label) in labels.iter().enumerate() {
616 if i == 0 && label.parse::<f64>().is_ok() { return false; }
618 if !LABEL.is_match(label) { return false; }
620 }
621 true
622 }
623
624 pub fn full(&self) -> &str {
626 &self.full
627 }
628
629 fn assemble(input: &str, s_len: usize) -> String {
630 let domain = input.to_lowercase();
631
632 let d_labels: Vec<&str> = domain
633 .trim_end_matches('.')
634 .split('.').rev().collect();
635
636 (&d_labels[..s_len]).iter().rev()
637 .map(|part| *part)
638 .collect::<Vec<_>>()
639 .join(".")
640 }
641
642 fn find_match(input: &str, domain: &str, list: &List) -> Result<Domain> {
643 let mut longest_valid = None;
644
645 let mut current = &list.root;
646 let mut s_labels_len = 0;
647
648 for label in domain.rsplit('.') {
649 if let Some(child) = current.children.get(label) {
650 current = child;
651 s_labels_len += 1;
652 } else if let Some(child) = current.children.get("*") {
653 current = child;
655 s_labels_len += 1;
656 } else {
657 break;
659 }
660
661 if let Some(list_leaf) = ¤t.leaf {
662 longest_valid = Some((list_leaf, s_labels_len));
663 }
664 }
665
666 match longest_valid {
667 Some((leaf, suffix_len)) => {
668 let typ = Some(leaf.typ);
669
670 let suffix_len = if leaf.is_exception_rule {
671 suffix_len - 1
672 } else {
673 suffix_len
674 };
675
676 let suffix = Some(Self::assemble(input, suffix_len));
677 let d_labels_len = domain.match_indices(".").count() + 1;
678
679 let registrable = if d_labels_len > suffix_len {
680 Some(Self::assemble(input, suffix_len + 1))
681 } else {
682 None
683 };
684
685 Ok(Domain {
686 full: input.to_owned(),
687 typ: typ,
688 suffix: suffix,
689 registrable: registrable,
690 })
691 },
692 None => {
693 Ok(Domain {
694 full: input.to_owned(),
695 typ: None,
696 suffix: None,
697 registrable: None,
698 })
699 },
700 }
701 }
702
703 fn to_ascii(domain: &str) -> Result<String> {
704 let result = idna::Config::default()
705 .transitional_processing(true)
706 .verify_dns_length(true)
707 .to_ascii(domain);
708 result.map_err(|error| ErrorKind::Uts46(error).into())
709 }
710
711 fn parse(domain: &str, list: &List, check_syntax: bool) -> Result<Domain> {
712 if check_syntax && !Self::has_valid_syntax(domain) {
713 return Err(ErrorKind::InvalidDomain(domain.into()).into());
714 }
715 let input = domain.trim_end_matches('.');
716 let (domain, res) = domain_to_unicode(input);
717 if let Err(errors) = res {
718 return Err(ErrorKind::Uts46(errors).into());
719 }
720 Self::find_match(input, &domain, list)
721 }
722
723 pub fn root(&self) -> Option<&str> {
725 match self.registrable {
726 Some(ref registrable) => Some(registrable),
727 None => None,
728 }
729 }
730
731 pub fn suffix(&self) -> Option<&str> {
733 match self.suffix {
734 Some(ref suffix) => Some(suffix),
735 None => None,
736 }
737 }
738
739 pub fn is_private(&self) -> bool {
741 match self.typ {
742 Some(typ) => match typ {
743 Type::Icann => false,
744 Type::Private => true,
745 },
746 None => false,
747 }
748 }
749
750 pub fn is_icann(&self) -> bool {
752 match self.typ {
753 Some(typ) => match typ {
754 Type::Icann => true,
755 Type::Private => false,
756 },
757 None => false,
758 }
759 }
760
761 pub fn has_known_suffix(&self) -> bool {
771 self.typ.is_some()
772 }
773}
774
775impl fmt::Display for Domain {
776 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
777 write!(f, "{}", self.full.trim_end_matches(".").to_lowercase())
778 }
779}
780
781impl DnsName {
782 pub fn domain(&self) -> Option<&Domain> {
784 self.domain.as_ref()
785 }
786}
787
788impl fmt::Display for DnsName {
789 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
790 self.name.fmt(f)
791 }
792}