#![recursion_limit = "1024"]
#[macro_use]
extern crate error_chain;
#[cfg(feature = "remote_list")]
extern crate native_tls;
#[macro_use]
extern crate lazy_static;
extern crate regex;
extern crate idna;
extern crate url;
pub mod errors;
#[cfg(feature = "remote_list")]
#[cfg(test)]
mod tests;
use std::fs::File;
use std::path::Path;
#[cfg(feature = "remote_list")]
use std::time::Duration;
#[cfg(feature = "remote_list")]
use std::net::TcpStream;
use std::io::Read;
#[cfg(feature = "remote_list")]
use std::io::Write;
use std::collections::HashMap;
use std::net::IpAddr;
use std::str::FromStr;
use std::fmt;
pub use errors::{Result, Error};
use regex::RegexSet;
use errors::{ErrorKind, ResultExt};
#[cfg(feature = "remote_list")]
use native_tls::TlsConnector;
use idna::{domain_to_unicode};
use url::Url;
pub const LIST_URL: &'static str = "https://publicsuffix.org/list/public_suffix_list.dat";
const PREVAILING_STAR_RULE: &'static str = "*";
#[derive(Debug, PartialEq, Eq, Hash)]
struct Suffix {
rule: String,
typ: Type,
}
#[derive(Debug)]
struct ListLeaf {
typ: Type,
is_exception_rule: bool,
}
impl ListLeaf {
fn new(typ: Type, is_exception_rule: bool) -> Self {
Self { typ, is_exception_rule }
}
}
#[derive(Debug)]
struct ListNode {
children: HashMap<String, Box<ListNode>>,
leaf: Option<ListLeaf>,
}
impl ListNode {
fn new() -> Self {
Self {
children: HashMap::new(),
leaf: None,
}
}
}
#[derive(Debug)]
pub struct List {
root: ListNode,
all: Vec<Suffix>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
enum Type {
Icann,
Private,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Domain {
full: String,
typ: Option<Type>,
suffix: Option<String>,
registrable: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Host {
Ip(IpAddr),
Domain(Domain),
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct DnsName {
name: String,
domain: Option<Domain>,
}
lazy_static! {
static ref LABEL: RegexSet = {
let exprs = vec![
r"^[[:alnum:]]+$",
r"^[[:alnum:]]+[[:alnum:]-]*[[:alnum:]]+$",
];
RegexSet::new(exprs).unwrap()
};
static ref LOCAL: RegexSet = {
let global = r#"[[:alnum:]!#$%&'*+/=?^_`{|}~-]"#;
let non_ascii = r#"[^\x00-\x7F]"#;
let quoted = r#"["(),\\:;<>@\[\]. ]"#;
let combined = format!(r#"({}*{}*)"#, global, non_ascii);
let exprs = vec![
format!(r#"^{}+$"#, combined),
format!(r#"^({0}+[.]?{0}+)+$"#, combined),
format!(r#"^"({}*{}*)*"$"#, combined, quoted),
];
RegexSet::new(exprs).unwrap()
};
}
pub trait IntoUrl {
fn into_url(self) -> Result<Url>;
}
impl IntoUrl for Url {
fn into_url(self) -> Result<Url> {
Ok(self)
}
}
impl<'a> IntoUrl for &'a str {
fn into_url(self) -> Result<Url> {
Ok(Url::parse(self)?)
}
}
impl<'a> IntoUrl for &'a String {
fn into_url(self) -> Result<Url> {
Ok(Url::parse(self)?)
}
}
impl IntoUrl for String {
fn into_url(self) -> Result<Url> {
Ok(Url::parse(&self)?)
}
}
#[cfg(feature = "remote_list")]
fn request<U: IntoUrl>(u: U) -> Result<String> {
let url = u.into_url()?;
let host = match url.host_str() {
Some(host) => host,
None => { return Err(ErrorKind::NoHost.into()); }
};
let port = match url.port_or_known_default() {
Some(port) => port,
None => { return Err(ErrorKind::NoPort.into()); }
};
let data = format!("GET {} HTTP/1.0\r\nHost: {}\r\n\r\n", url.path(), host);
let addr = format!("{}:{}", host, port);
let stream = TcpStream::connect(addr)?;
let timeout = Duration::from_secs(2);
stream.set_read_timeout(Some(timeout))?;
stream.set_write_timeout(Some(timeout))?;
let mut res = String::new();
match url.scheme() {
scheme if scheme == "https" => {
let connector = TlsConnector::builder().build()?;
let mut stream = connector.connect(host, stream)?;
stream.write_all(data.as_bytes())?;
stream.read_to_string(&mut res)?;
}
scheme if scheme == "http" => {
let mut stream = stream;
stream.write_all(data.as_bytes())?;
stream.read_to_string(&mut res)?;
}
_ => { return Err(ErrorKind::UnsupportedScheme.into()); }
}
Ok(res)
}
impl List {
fn append(&mut self, mut rule: &str, typ: Type) -> Result<()> {
let mut is_exception_rule = false;
if rule.starts_with("!") {
is_exception_rule = true;
rule = &rule[1..];
}
let mut current = &mut self.root;
for label in rule.rsplit('.') {
if label.is_empty() {
return Err(ErrorKind::InvalidRule(rule.into()).into());
}
let cur = current;
current = cur.children.entry(label.to_owned())
.or_insert(Box::new(ListNode::new()));
}
current.leaf = Some(ListLeaf::new(typ, is_exception_rule));
self.all.push(Suffix {rule: rule.to_owned(), typ: typ});
Ok(())
}
fn build(res: &str) -> Result<List> {
let mut typ = None;
let mut list = List::empty();
for line in res.lines() {
match line {
line if line.contains("BEGIN ICANN DOMAINS") => { typ = Some(Type::Icann); }
line if line.contains("BEGIN PRIVATE DOMAINS") => { typ = Some(Type::Private); }
line if line.starts_with("//") => { continue; }
line => {
match typ {
Some(typ) => {
let rule = match line.split_whitespace().next() {
Some(rule) => rule,
None => continue,
};
list.append(rule, typ)?;
}
None => { continue; }
}
}
}
}
if list.root.children.is_empty() || list.all().is_empty() {
return Err(ErrorKind::InvalidList.into());
}
list.append(PREVAILING_STAR_RULE, Type::Icann)?;
Ok(list)
}
pub fn empty() -> List {
List {
root: ListNode::new(),
all: Vec::new(),
}
}
#[cfg(feature = "remote_list")]
pub fn from_url<U: IntoUrl>(url: U) -> Result<List> {
request(url).and_then(Self::from_string)
}
pub fn from_path<P: AsRef<Path>>(path: P) -> Result<List> {
File::open(path)
.map_err(|err| ErrorKind::Io(err).into())
.and_then(|mut data| {
let mut res = String::new();
data.read_to_string(&mut res)?;
Ok(res)
})
.and_then(Self::from_string)
}
pub fn from_reader<R: Read>(mut reader: R) -> Result<List> {
let mut res = String::new();
reader.read_to_string(&mut res)?;
Self::build(&res)
}
pub fn from_string(string: String) -> Result<List> {
Self::from_str(&string)
}
pub fn from_str(string: &str) -> Result<List> {
Self::build(string)
}
#[cfg(feature = "remote_list")]
pub fn fetch() -> Result<List> {
let github = "https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat";
Self::from_url(LIST_URL)
.or_else(|_| Self::from_url(github))
}
fn find_type(&self, typ: Type) -> Vec<&str> {
self.all_internal()
.filter(|s| s.typ == typ)
.map(|s| s.rule.as_str()).collect()
}
pub fn icann(&self) -> Vec<&str> {
self.find_type(Type::Icann)
}
pub fn private(&self) -> Vec<&str> {
self.find_type(Type::Private)
}
pub fn all(&self) -> Vec<&str> {
self.all_internal().map(|s| s.rule.as_str()).collect()
}
fn all_internal(&self) -> impl Iterator<Item = &Suffix> {
self.all.iter()
.filter(|s| s.rule != PREVAILING_STAR_RULE)
}
pub fn parse_domain(&self, domain: &str) -> Result<Domain> {
Domain::parse(domain, self, true)
}
pub fn parse_host(&self, host: &str) -> Result<Host> {
Host::parse(host, self)
}
pub fn parse_url<U: IntoUrl>(&self, url: U) -> Result<Host> {
let url = url.into_url()?;
match url.scheme() {
"mailto" => {
match url.host_str() {
Some(host) => self.parse_email(&format!("{}@{}", url.username(), host)),
None => Err(ErrorKind::InvalidEmail.into()),
}
}
_ => {
match url.host_str() {
Some(host) => self.parse_host(host),
None => Err(ErrorKind::NoHost.into()),
}
}
}
}
pub fn parse_email(&self, address: &str) -> Result<Host> {
let mut parts = address.rsplitn(2, "@");
let host = match parts.next() {
Some(host) => host,
None => { return Err(ErrorKind::InvalidEmail.into()); }
};
let local = match parts.next() {
Some(local) => local,
None => { return Err(ErrorKind::InvalidEmail.into()); }
};
if local.chars().count() > 64
|| address.chars().count() > 254
|| (!local.starts_with('"') && local.contains(".."))
|| !LOCAL.is_match(local)
{
return Err(ErrorKind::InvalidEmail.into());
}
self.parse_host(host)
}
pub fn parse_str(&self, string: &str) -> Result<Host> {
if string.contains("://") {
self.parse_url(string)
} else if string.contains("@") {
self.parse_email(string)
} else {
self.parse_host(string)
}
}
pub fn parse_dns_name(&self, name: &str) -> Result<DnsName> {
let mut dns_name = DnsName {
name: Domain::to_ascii(name).chain_err(|| {
ErrorKind::InvalidDomain(name.into())
})?,
domain: None,
};
if let Ok(mut domain) = Domain::parse(name, self, false) {
if let Some(root) = domain.root().map(|root| root.to_string()) {
if Domain::has_valid_syntax(&root) {
domain.full = root;
dns_name.domain = Some(domain);
}
}
}
Ok(dns_name)
}
}
impl Host {
fn parse(mut host: &str, list: &List) -> Result<Host> {
if let Ok(domain) = Domain::parse(host, list, true) {
return Ok(Host::Domain(domain));
}
if host.starts_with("[")
&& !host.starts_with("[[")
&& host.ends_with("]")
&& !host.ends_with("]]")
{
host = host
.trim_start_matches("[")
.trim_end_matches("]");
};
if let Ok(ip) = IpAddr::from_str(host) {
return Ok(Host::Ip(ip));
}
Err(ErrorKind::InvalidHost.into())
}
pub fn is_ip(&self) -> bool {
if let &Host::Ip(_) = self {
return true;
}
false
}
pub fn is_domain(&self) -> bool {
if let &Host::Domain(_) = self {
return true;
}
false
}
}
impl fmt::Display for Host {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
&Host::Ip(ref ip) => write!(f, "{}", ip),
&Host::Domain(ref domain) => write!(f, "{}", domain),
}
}
}
impl Domain {
pub fn has_valid_syntax(domain: &str) -> bool {
if domain.starts_with('.') { return false; }
let domain = match Self::to_ascii(domain) {
Ok(domain) => { domain }
Err(_) => { return false; }
};
let mut labels: Vec<&str> = domain.split('.').collect();
if domain.ends_with(".") { labels.pop(); }
if labels.len() > 127 { return false; }
labels.reverse();
for (i, label) in labels.iter().enumerate() {
if i == 0 && label.parse::<f64>().is_ok() { return false; }
if !LABEL.is_match(label) { return false; }
}
true
}
pub fn full(&self) -> &str {
&self.full
}
fn assemble(input: &str, s_len: usize) -> String {
let domain = input.to_lowercase();
let d_labels: Vec<&str> = domain
.trim_end_matches('.')
.split('.').rev().collect();
(&d_labels[..s_len]).iter().rev()
.map(|part| *part)
.collect::<Vec<_>>()
.join(".")
}
fn find_match(input: &str, domain: &str, list: &List) -> Result<Domain> {
let mut longest_valid = None;
let mut current = &list.root;
let mut s_labels_len = 0;
for label in domain.rsplit('.') {
if let Some(child) = current.children.get(label) {
current = child;
s_labels_len += 1;
} else if let Some(child) = current.children.get("*") {
current = child;
s_labels_len += 1;
} else {
break;
}
if let Some(list_leaf) = ¤t.leaf {
longest_valid = Some((list_leaf, s_labels_len));
}
}
match longest_valid {
Some((leaf, suffix_len)) => {
let typ = Some(leaf.typ);
let suffix_len = if leaf.is_exception_rule {
suffix_len - 1
} else {
suffix_len
};
let suffix = Some(Self::assemble(input, suffix_len));
let d_labels_len = domain.match_indices(".").count() + 1;
let registrable = if d_labels_len > suffix_len {
Some(Self::assemble(input, suffix_len + 1))
} else {
None
};
Ok(Domain {
full: input.to_owned(),
typ: typ,
suffix: suffix,
registrable: registrable,
})
},
None => {
Ok(Domain {
full: input.to_owned(),
typ: None,
suffix: None,
registrable: None,
})
},
}
}
fn to_ascii(domain: &str) -> Result<String> {
let result = idna::Config::default()
.transitional_processing(true)
.verify_dns_length(true)
.to_ascii(domain);
result.map_err(|error| ErrorKind::Uts46(error).into())
}
fn parse(domain: &str, list: &List, check_syntax: bool) -> Result<Domain> {
if check_syntax && !Self::has_valid_syntax(domain) {
return Err(ErrorKind::InvalidDomain(domain.into()).into());
}
let input = domain.trim_end_matches('.');
let (domain, res) = domain_to_unicode(input);
if let Err(errors) = res {
return Err(ErrorKind::Uts46(errors).into());
}
Self::find_match(input, &domain, list)
}
pub fn root(&self) -> Option<&str> {
match self.registrable {
Some(ref registrable) => Some(registrable),
None => None,
}
}
pub fn suffix(&self) -> Option<&str> {
match self.suffix {
Some(ref suffix) => Some(suffix),
None => None,
}
}
pub fn is_private(&self) -> bool {
match self.typ {
Some(typ) => match typ {
Type::Icann => false,
Type::Private => true,
},
None => false,
}
}
pub fn is_icann(&self) -> bool {
match self.typ {
Some(typ) => match typ {
Type::Icann => true,
Type::Private => false,
},
None => false,
}
}
pub fn has_known_suffix(&self) -> bool {
self.typ.is_some()
}
}
impl fmt::Display for Domain {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.full.trim_end_matches(".").to_lowercase())
}
}
impl DnsName {
pub fn domain(&self) -> Option<&Domain> {
self.domain.as_ref()
}
}
impl fmt::Display for DnsName {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.name.fmt(f)
}
}