|
|
@ -1,7 +1,9 @@ |
|
|
|
use std::{ |
|
|
|
collections::HashMap, |
|
|
|
fs::{create_dir_all, remove_file, symlink_metadata, File}, |
|
|
|
io::prelude::*, |
|
|
|
net::{IpAddr, ToSocketAddrs}, |
|
|
|
sync::RwLock, |
|
|
|
time::{Duration, SystemTime}, |
|
|
|
}; |
|
|
|
|
|
|
@ -28,20 +30,48 @@ static CLIENT: Lazy<Client> = Lazy::new(|| { |
|
|
|
.unwrap() |
|
|
|
}); |
|
|
|
|
|
|
|
static ICON_REL_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"icon$|apple.*icon").unwrap()); |
|
|
|
static ICON_HREF_REGEX: Lazy<Regex> = |
|
|
|
Lazy::new(|| Regex::new(r"(?i)\w+\.(jpg|jpeg|png|ico)(\?.*)?$|^data:image.*base64").unwrap()); |
|
|
|
// Build Regex only once since this takes a lot of time.
|
|
|
|
static ICON_REL_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)icon$|apple.*icon").unwrap()); |
|
|
|
static ICON_SIZE_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?x)(\d+)\D*(\d+)").unwrap()); |
|
|
|
|
|
|
|
// Special HashMap which holds the user defined Regex to speedup matching the regex.
|
|
|
|
static ICON_BLACKLIST_REGEX: Lazy<RwLock<HashMap<String, Regex>>> = Lazy::new(|| RwLock::new(HashMap::new())); |
|
|
|
|
|
|
|
#[get("/<domain>/icon.png")] |
|
|
|
fn icon(domain: String) -> Option<Cached<Content<Vec<u8>>>> { |
|
|
|
if !is_valid_domain(&domain) { |
|
|
|
warn!("Invalid domain: {}", domain); |
|
|
|
return None; |
|
|
|
} |
|
|
|
|
|
|
|
get_icon(&domain).map(|icon| Cached::long(Content(ContentType::new("image", "x-icon"), icon))) |
|
|
|
} |
|
|
|
|
|
|
|
/// Returns if the domain provided is valid or not.
|
|
|
|
///
|
|
|
|
/// This does some manual checks and makes use of Url to do some basic checking.
|
|
|
|
/// domains can't be larger then 63 characters (not counting multiple subdomains) according to the RFC's, but we limit the total size to 255.
|
|
|
|
fn is_valid_domain(domain: &str) -> bool { |
|
|
|
// Don't allow empty or too big domains or path traversal
|
|
|
|
if domain.is_empty() || domain.len() > 255 || domain.contains("..") { |
|
|
|
// If parsing the domain fails using Url, it will not work with reqwest.
|
|
|
|
if let Err(parse_error) = Url::parse(format!("https://{}", domain).as_str()) { |
|
|
|
debug!("Domain parse error: '{}' - {:?}", domain, parse_error); |
|
|
|
return false; |
|
|
|
} else if domain.is_empty() |
|
|
|
|| domain.contains("..") |
|
|
|
|| domain.starts_with('.') |
|
|
|
|| domain.starts_with('-') |
|
|
|
|| domain.ends_with('-') |
|
|
|
{ |
|
|
|
debug!("Domain validation error: '{}' is either empty, contains '..', starts with an '.', starts or ends with a '-'", domain); |
|
|
|
return false; |
|
|
|
} else if domain.len() > 255 { |
|
|
|
debug!("Domain validation error: '{}' exceeds 255 characters", domain); |
|
|
|
return false; |
|
|
|
} |
|
|
|
|
|
|
|
// Only alphanumeric or specific characters
|
|
|
|
for c in domain.chars() { |
|
|
|
if !c.is_alphanumeric() && !ALLOWED_CHARS.contains(c) { |
|
|
|
debug!("Domain validation error: '{}' contains an invalid character '{}'", domain, c); |
|
|
|
return false; |
|
|
|
} |
|
|
|
} |
|
|
@ -49,21 +79,10 @@ fn is_valid_domain(domain: &str) -> bool { |
|
|
|
true |
|
|
|
} |
|
|
|
|
|
|
|
#[get("/<domain>/icon.png")] |
|
|
|
fn icon(domain: String) -> Option<Cached<Content<Vec<u8>>>> { |
|
|
|
if !is_valid_domain(&domain) { |
|
|
|
warn!("Invalid domain: {:#?}", domain); |
|
|
|
return None; |
|
|
|
} |
|
|
|
|
|
|
|
get_icon(&domain).map(|icon| { |
|
|
|
Cached::long(Content(ContentType::new("image", "x-icon"), icon)) |
|
|
|
}) |
|
|
|
} |
|
|
|
|
|
|
|
/// TODO: This is extracted from IpAddr::is_global, which is unstable:
|
|
|
|
/// https://doc.rust-lang.org/nightly/std/net/enum.IpAddr.html#method.is_global
|
|
|
|
/// Remove once https://github.com/rust-lang/rust/issues/27709 is merged
|
|
|
|
#[allow(clippy::nonminimal_bool)] |
|
|
|
#[cfg(not(feature = "unstable"))] |
|
|
|
fn is_global(ip: IpAddr) -> bool { |
|
|
|
match ip { |
|
|
@ -159,7 +178,7 @@ mod tests { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
fn check_icon_domain_is_blacklisted(domain: &str) -> bool { |
|
|
|
fn is_domain_blacklisted(domain: &str) -> bool { |
|
|
|
let mut is_blacklisted = CONFIG.icon_blacklist_non_global_ips() |
|
|
|
&& (domain, 0) |
|
|
|
.to_socket_addrs() |
|
|
@ -177,7 +196,31 @@ fn check_icon_domain_is_blacklisted(domain: &str) -> bool { |
|
|
|
// Skip the regex check if the previous one is true already
|
|
|
|
if !is_blacklisted { |
|
|
|
if let Some(blacklist) = CONFIG.icon_blacklist_regex() { |
|
|
|
let regex = Regex::new(&blacklist).expect("Valid Regex"); |
|
|
|
let mut regex_hashmap = ICON_BLACKLIST_REGEX.read().unwrap(); |
|
|
|
|
|
|
|
// Use the pre-generate Regex stored in a Lazy HashMap if there's one, else generate it.
|
|
|
|
let regex = if let Some(regex) = regex_hashmap.get(&blacklist) { |
|
|
|
regex |
|
|
|
} else { |
|
|
|
drop(regex_hashmap); |
|
|
|
|
|
|
|
let mut regex_hashmap_write = ICON_BLACKLIST_REGEX.write().unwrap(); |
|
|
|
// Clear the current list if the previous key doesn't exists.
|
|
|
|
// To prevent growing of the HashMap after someone has changed it via the admin interface.
|
|
|
|
if regex_hashmap_write.len() >= 1 { |
|
|
|
regex_hashmap_write.clear(); |
|
|
|
} |
|
|
|
|
|
|
|
// Generate the regex to store in too the Lazy Static HashMap.
|
|
|
|
let blacklist_regex = Regex::new(&blacklist).unwrap(); |
|
|
|
regex_hashmap_write.insert(blacklist.to_string(), blacklist_regex); |
|
|
|
drop(regex_hashmap_write); |
|
|
|
|
|
|
|
regex_hashmap = ICON_BLACKLIST_REGEX.read().unwrap(); |
|
|
|
regex_hashmap.get(&blacklist).unwrap() |
|
|
|
}; |
|
|
|
|
|
|
|
// Use the pre-generate Regex stored in a Lazy HashMap.
|
|
|
|
if regex.is_match(&domain) { |
|
|
|
warn!("Blacklisted domain: {:#?} matched {:#?}", domain, blacklist); |
|
|
|
is_blacklisted = true; |
|
|
@ -213,8 +256,7 @@ fn get_icon(domain: &str) -> Option<Vec<u8>> { |
|
|
|
Err(e) => { |
|
|
|
error!("Error downloading icon: {:?}", e); |
|
|
|
let miss_indicator = path + ".miss"; |
|
|
|
let empty_icon = Vec::new(); |
|
|
|
save_icon(&miss_indicator, &empty_icon); |
|
|
|
save_icon(&miss_indicator, &[]); |
|
|
|
None |
|
|
|
} |
|
|
|
} |
|
|
@ -307,11 +349,51 @@ fn get_icon_url(domain: &str) -> Result<(Vec<Icon>, String), Error> { |
|
|
|
// Some sites have extra security in place with for example XSRF Tokens.
|
|
|
|
let mut cookie_str = String::new(); |
|
|
|
|
|
|
|
let resp = get_page(&ssldomain).or_else(|_| get_page(&httpdomain)); |
|
|
|
// First check the domain as given during the request for both HTTPS and HTTP.
|
|
|
|
let resp = match get_page(&ssldomain).or_else(|_| get_page(&httpdomain)) { |
|
|
|
Ok(c) => Ok(c), |
|
|
|
Err(e) => { |
|
|
|
let mut sub_resp = Err(e); |
|
|
|
|
|
|
|
// When the domain is not an IP, and has more then one dot, remove all subdomains.
|
|
|
|
let is_ip = domain.parse::<IpAddr>(); |
|
|
|
if is_ip.is_err() && domain.matches('.').count() > 1 { |
|
|
|
let mut domain_parts = domain.split('.'); |
|
|
|
let base_domain = format!( |
|
|
|
"{base}.{tld}", |
|
|
|
tld = domain_parts.next_back().unwrap(), |
|
|
|
base = domain_parts.next_back().unwrap() |
|
|
|
); |
|
|
|
if is_valid_domain(&base_domain) { |
|
|
|
let sslbase = format!("https://{}", base_domain); |
|
|
|
let httpbase = format!("http://{}", base_domain); |
|
|
|
debug!("[get_icon_url]: Trying without subdomains '{}'", base_domain); |
|
|
|
|
|
|
|
sub_resp = get_page(&sslbase).or_else(|_| get_page(&httpbase)); |
|
|
|
} |
|
|
|
|
|
|
|
// When the domain is not an IP, and has less then 2 dots, try to add www. infront of it.
|
|
|
|
} else if is_ip.is_err() && domain.matches('.').count() < 2 { |
|
|
|
let www_domain = format!("www.{}", domain); |
|
|
|
if is_valid_domain(&www_domain) { |
|
|
|
let sslwww = format!("https://{}", www_domain); |
|
|
|
let httpwww = format!("http://{}", www_domain); |
|
|
|
debug!("[get_icon_url]: Trying with www. prefix '{}'", www_domain); |
|
|
|
|
|
|
|
sub_resp = get_page(&sslwww).or_else(|_| get_page(&httpwww)); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
sub_resp |
|
|
|
} |
|
|
|
}; |
|
|
|
|
|
|
|
if let Ok(content) = resp { |
|
|
|
// Extract the URL from the respose in case redirects occured (like @ gitlab.com)
|
|
|
|
let url = content.url().clone(); |
|
|
|
|
|
|
|
// Get all the cookies and pass it on to the next function.
|
|
|
|
// Needed for XSRF Cookies for example (like @ mijn.ing.nl)
|
|
|
|
let raw_cookies = content.headers().get_all("set-cookie"); |
|
|
|
cookie_str = raw_cookies |
|
|
|
.iter() |
|
|
@ -337,14 +419,18 @@ fn get_icon_url(domain: &str) -> Result<(Vec<Icon>, String), Error> { |
|
|
|
let favicons = soup |
|
|
|
.tag("link") |
|
|
|
.attr("rel", ICON_REL_REGEX.clone()) // Only use icon rels
|
|
|
|
.attr("href", ICON_HREF_REGEX.clone()) // Only allow specific extensions
|
|
|
|
.attr_name("href") // Make sure there is a href
|
|
|
|
.find_all(); |
|
|
|
|
|
|
|
// Loop through all the found icons and determine it's priority
|
|
|
|
for favicon in favicons { |
|
|
|
let sizes = favicon.get("sizes"); |
|
|
|
let href = favicon.get("href").expect("Missing href"); |
|
|
|
let full_href = url.join(&href).unwrap().into_string(); |
|
|
|
let href = favicon.get("href").unwrap(); |
|
|
|
// Skip invalid url's
|
|
|
|
let full_href = match url.join(&href) { |
|
|
|
Ok(h) => h.into_string(), |
|
|
|
_ => continue, |
|
|
|
}; |
|
|
|
|
|
|
|
let priority = get_icon_priority(&full_href, sizes); |
|
|
|
|
|
|
@ -368,21 +454,19 @@ fn get_page(url: &str) -> Result<Response, Error> { |
|
|
|
} |
|
|
|
|
|
|
|
fn get_page_with_cookies(url: &str, cookie_str: &str) -> Result<Response, Error> { |
|
|
|
if check_icon_domain_is_blacklisted(Url::parse(url).unwrap().host_str().unwrap_or_default()) { |
|
|
|
err!("Favicon rel linked to a non blacklisted domain!"); |
|
|
|
if is_domain_blacklisted(Url::parse(url).unwrap().host_str().unwrap_or_default()) { |
|
|
|
err!("Favicon rel linked to a blacklisted domain!"); |
|
|
|
} |
|
|
|
|
|
|
|
if cookie_str.is_empty() { |
|
|
|
CLIENT.get(url).send()?.error_for_status().map_err(Into::into) |
|
|
|
} else { |
|
|
|
CLIENT |
|
|
|
.get(url) |
|
|
|
.header("cookie", cookie_str) |
|
|
|
.send()? |
|
|
|
let mut client = CLIENT.get(url); |
|
|
|
if !cookie_str.is_empty() { |
|
|
|
client = client.header("cookie", cookie_str) |
|
|
|
} |
|
|
|
|
|
|
|
client.send()? |
|
|
|
.error_for_status() |
|
|
|
.map_err(Into::into) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/// Returns a Integer with the priority of the type of the icon which to prefer.
|
|
|
|
/// The lower the number the better.
|
|
|
@ -464,7 +548,7 @@ fn parse_sizes(sizes: Option<String>) -> (u16, u16) { |
|
|
|
} |
|
|
|
|
|
|
|
fn download_icon(domain: &str) -> Result<Vec<u8>, Error> { |
|
|
|
if check_icon_domain_is_blacklisted(domain) { |
|
|
|
if is_domain_blacklisted(domain) { |
|
|
|
err!("Domain is blacklisted", domain) |
|
|
|
} |
|
|
|
|
|
|
@ -495,7 +579,7 @@ fn download_icon(domain: &str) -> Result<Vec<u8>, Error> { |
|
|
|
res.copy_to(&mut buffer)?; |
|
|
|
break; |
|
|
|
} |
|
|
|
Err(_) => info!("Download failed for {}", icon.href), |
|
|
|
Err(_) => warn!("Download failed for {}", icon.href), |
|
|
|
}; |
|
|
|
} |
|
|
|
} |
|
|
|