@ -1,7 +1,9 @@
use std ::{
collections ::HashMap ,
fs ::{ create_dir_all , remove_file , symlink_metadata , File } ,
io ::prelude ::* ,
net ::{ IpAddr , ToSocketAddrs } ,
sync ::RwLock ,
time ::{ Duration , SystemTime } ,
} ;
@ -28,20 +30,48 @@ static CLIENT: Lazy<Client> = Lazy::new(|| {
. unwrap ( )
} ) ;
static ICON_REL_REGEX : Lazy < Regex > = Lazy ::new ( | | Regex ::new ( r"icon$|apple.*icon" ) . unwrap ( ) ) ;
static ICON_HREF_REGEX : Lazy < Regex > =
Lazy ::new ( | | Regex ::new ( r"(?i)\w+\.(jpg|jpeg|png|ico)(\?.*)?$|^data:image.*base64" ) . unwrap ( ) ) ;
// Build Regex only once since this takes a lot of time.
static ICON_REL_REGEX : Lazy < Regex > = Lazy ::new ( | | Regex ::new ( r"(?i)icon$|apple.*icon" ) . unwrap ( ) ) ;
static ICON_SIZE_REGEX : Lazy < Regex > = Lazy ::new ( | | Regex ::new ( r"(?x)(\d+)\D*(\d+)" ) . unwrap ( ) ) ;
// Special HashMap which holds the user defined Regex to speedup matching the regex.
static ICON_BLACKLIST_REGEX : Lazy < RwLock < HashMap < String , Regex > > > = Lazy ::new ( | | RwLock ::new ( HashMap ::new ( ) ) ) ;
#[ get( " /<domain>/icon.png " ) ]
fn icon ( domain : String ) -> Option < Cached < Content < Vec < u8 > > > > {
if ! is_valid_domain ( & domain ) {
warn ! ( "Invalid domain: {}" , domain ) ;
return None ;
}
get_icon ( & domain ) . map ( | icon | Cached ::long ( Content ( ContentType ::new ( "image" , "x-icon" ) , icon ) ) )
}
/// Returns if the domain provided is valid or not.
///
/// This does some manual checks and makes use of Url to do some basic checking.
/// domains can't be larger then 63 characters (not counting multiple subdomains) according to the RFC's, but we limit the total size to 255.
fn is_valid_domain ( domain : & str ) -> bool {
// Don't allow empty or too big domains or path traversal
if domain . is_empty ( ) | | domain . len ( ) > 255 | | domain . contains ( ".." ) {
// If parsing the domain fails using Url, it will not work with reqwest.
if let Err ( parse_error ) = Url ::parse ( format ! ( "https://{}" , domain ) . as_str ( ) ) {
debug ! ( "Domain parse error: '{}' - {:?}" , domain , parse_error ) ;
return false ;
} else if domain . is_empty ( )
| | domain . contains ( ".." )
| | domain . starts_with ( '.' )
| | domain . starts_with ( '-' )
| | domain . ends_with ( '-' )
{
debug ! ( "Domain validation error: '{}' is either empty, contains '..', starts with an '.', starts or ends with a '-'" , domain ) ;
return false ;
} else if domain . len ( ) > 255 {
debug ! ( "Domain validation error: '{}' exceeds 255 characters" , domain ) ;
return false ;
}
// Only alphanumeric or specific characters
for c in domain . chars ( ) {
if ! c . is_alphanumeric ( ) & & ! ALLOWED_CHARS . contains ( c ) {
debug ! ( "Domain validation error: '{}' contains an invalid character '{}'" , domain , c ) ;
return false ;
}
}
@ -49,21 +79,10 @@ fn is_valid_domain(domain: &str) -> bool {
true
}
#[ get( " /<domain>/icon.png " ) ]
fn icon ( domain : String ) -> Option < Cached < Content < Vec < u8 > > > > {
if ! is_valid_domain ( & domain ) {
warn ! ( "Invalid domain: {:#?}" , domain ) ;
return None ;
}
get_icon ( & domain ) . map ( | icon | {
Cached ::long ( Content ( ContentType ::new ( "image" , "x-icon" ) , icon ) )
} )
}
/// TODO: This is extracted from IpAddr::is_global, which is unstable:
/// https://doc.rust-lang.org/nightly/std/net/enum.IpAddr.html#method.is_global
/// Remove once https://github.com/rust-lang/rust/issues/27709 is merged
#[ allow(clippy::nonminimal_bool) ]
#[ cfg(not(feature = " unstable " )) ]
fn is_global ( ip : IpAddr ) -> bool {
match ip {
@ -159,7 +178,7 @@ mod tests {
}
}
fn check_icon_domain_is _blacklisted( domain : & str ) -> bool {
fn is_domain _blacklisted( domain : & str ) -> bool {
let mut is_blacklisted = CONFIG . icon_blacklist_non_global_ips ( )
& & ( domain , 0 )
. to_socket_addrs ( )
@ -177,7 +196,31 @@ fn check_icon_domain_is_blacklisted(domain: &str) -> bool {
// Skip the regex check if the previous one is true already
if ! is_blacklisted {
if let Some ( blacklist ) = CONFIG . icon_blacklist_regex ( ) {
let regex = Regex ::new ( & blacklist ) . expect ( "Valid Regex" ) ;
let mut regex_hashmap = ICON_BLACKLIST_REGEX . read ( ) . unwrap ( ) ;
// Use the pre-generate Regex stored in a Lazy HashMap if there's one, else generate it.
let regex = if let Some ( regex ) = regex_hashmap . get ( & blacklist ) {
regex
} else {
drop ( regex_hashmap ) ;
let mut regex_hashmap_write = ICON_BLACKLIST_REGEX . write ( ) . unwrap ( ) ;
// Clear the current list if the previous key doesn't exists.
// To prevent growing of the HashMap after someone has changed it via the admin interface.
if regex_hashmap_write . len ( ) > = 1 {
regex_hashmap_write . clear ( ) ;
}
// Generate the regex to store in too the Lazy Static HashMap.
let blacklist_regex = Regex ::new ( & blacklist ) . unwrap ( ) ;
regex_hashmap_write . insert ( blacklist . to_string ( ) , blacklist_regex ) ;
drop ( regex_hashmap_write ) ;
regex_hashmap = ICON_BLACKLIST_REGEX . read ( ) . unwrap ( ) ;
regex_hashmap . get ( & blacklist ) . unwrap ( )
} ;
// Use the pre-generate Regex stored in a Lazy HashMap.
if regex . is_match ( & domain ) {
warn ! ( "Blacklisted domain: {:#?} matched {:#?}" , domain , blacklist ) ;
is_blacklisted = true ;
@ -213,8 +256,7 @@ fn get_icon(domain: &str) -> Option<Vec<u8>> {
Err ( e ) = > {
error ! ( "Error downloading icon: {:?}" , e ) ;
let miss_indicator = path + ".miss" ;
let empty_icon = Vec ::new ( ) ;
save_icon ( & miss_indicator , & empty_icon ) ;
save_icon ( & miss_indicator , & [ ] ) ;
None
}
}
@ -307,11 +349,51 @@ fn get_icon_url(domain: &str) -> Result<(Vec<Icon>, String), Error> {
// Some sites have extra security in place with for example XSRF Tokens.
let mut cookie_str = String ::new ( ) ;
let resp = get_page ( & ssldomain ) . or_else ( | _ | get_page ( & httpdomain ) ) ;
// First check the domain as given during the request for both HTTPS and HTTP.
let resp = match get_page ( & ssldomain ) . or_else ( | _ | get_page ( & httpdomain ) ) {
Ok ( c ) = > Ok ( c ) ,
Err ( e ) = > {
let mut sub_resp = Err ( e ) ;
// When the domain is not an IP, and has more then one dot, remove all subdomains.
let is_ip = domain . parse ::< IpAddr > ( ) ;
if is_ip . is_err ( ) & & domain . matches ( '.' ) . count ( ) > 1 {
let mut domain_parts = domain . split ( '.' ) ;
let base_domain = format ! (
"{base}.{tld}" ,
tld = domain_parts . next_back ( ) . unwrap ( ) ,
base = domain_parts . next_back ( ) . unwrap ( )
) ;
if is_valid_domain ( & base_domain ) {
let sslbase = format ! ( "https://{}" , base_domain ) ;
let httpbase = format ! ( "http://{}" , base_domain ) ;
debug ! ( "[get_icon_url]: Trying without subdomains '{}'" , base_domain ) ;
sub_resp = get_page ( & sslbase ) . or_else ( | _ | get_page ( & httpbase ) ) ;
}
// When the domain is not an IP, and has less then 2 dots, try to add www. infront of it.
} else if is_ip . is_err ( ) & & domain . matches ( '.' ) . count ( ) < 2 {
let www_domain = format ! ( "www.{}" , domain ) ;
if is_valid_domain ( & www_domain ) {
let sslwww = format ! ( "https://{}" , www_domain ) ;
let httpwww = format ! ( "http://{}" , www_domain ) ;
debug ! ( "[get_icon_url]: Trying with www. prefix '{}'" , www_domain ) ;
sub_resp = get_page ( & sslwww ) . or_else ( | _ | get_page ( & httpwww ) ) ;
}
}
sub_resp
}
} ;
if let Ok ( content ) = resp {
// Extract the URL from the respose in case redirects occured (like @ gitlab.com)
let url = content . url ( ) . clone ( ) ;
// Get all the cookies and pass it on to the next function.
// Needed for XSRF Cookies for example (like @ mijn.ing.nl)
let raw_cookies = content . headers ( ) . get_all ( "set-cookie" ) ;
cookie_str = raw_cookies
. iter ( )
@ -337,14 +419,18 @@ fn get_icon_url(domain: &str) -> Result<(Vec<Icon>, String), Error> {
let favicons = soup
. tag ( "link" )
. attr ( "rel" , ICON_REL_REGEX . clone ( ) ) // Only use icon rels
. attr ( "href" , ICON_HREF_REGEX . clone ( ) ) // Only allow specific extensions
. attr_name ( "href" ) // Make sure there is a href
. find_all ( ) ;
// Loop through all the found icons and determine it's priority
for favicon in favicons {
let sizes = favicon . get ( "sizes" ) ;
let href = favicon . get ( "href" ) . expect ( "Missing href" ) ;
let full_href = url . join ( & href ) . unwrap ( ) . into_string ( ) ;
let href = favicon . get ( "href" ) . unwrap ( ) ;
// Skip invalid url's
let full_href = match url . join ( & href ) {
Ok ( h ) = > h . into_string ( ) ,
_ = > continue ,
} ;
let priority = get_icon_priority ( & full_href , sizes ) ;
@ -368,20 +454,18 @@ fn get_page(url: &str) -> Result<Response, Error> {
}
fn get_page_with_cookies ( url : & str , cookie_str : & str ) -> Result < Response , Error > {
if check_icon_domain_is _blacklisted( Url ::parse ( url ) . unwrap ( ) . host_str ( ) . unwrap_or_default ( ) ) {
err ! ( "Favicon rel linked to a non blacklisted domain!" ) ;
if is_domain _blacklisted( Url ::parse ( url ) . unwrap ( ) . host_str ( ) . unwrap_or_default ( ) ) {
err ! ( "Favicon rel linked to a blacklisted domain!" ) ;
}
if cookie_str . is_empty ( ) {
CLIENT . get ( url ) . send ( ) ? . error_for_status ( ) . map_err ( Into ::into )
} else {
CLIENT
. get ( url )
. header ( "cookie" , cookie_str )
. send ( ) ?
. error_for_status ( )
. map_err ( Into ::into )
let mut client = CLIENT . get ( url ) ;
if ! cookie_str . is_empty ( ) {
client = client . header ( "cookie" , cookie_str )
}
client . send ( ) ?
. error_for_status ( )
. map_err ( Into ::into )
}
/// Returns a Integer with the priority of the type of the icon which to prefer.
@ -464,7 +548,7 @@ fn parse_sizes(sizes: Option<String>) -> (u16, u16) {
}
fn download_icon ( domain : & str ) -> Result < Vec < u8 > , Error > {
if check_icon_domain_is _blacklisted( domain ) {
if is_domain _blacklisted( domain ) {
err ! ( "Domain is blacklisted" , domain )
}
@ -495,7 +579,7 @@ fn download_icon(domain: &str) -> Result<Vec<u8>, Error> {
res . copy_to ( & mut buffer ) ? ;
break ;
}
Err ( _ ) = > info ! ( "Download failed for {}" , icon . href ) ,
Err ( _ ) = > warn ! ( "Download failed for {}" , icon . href ) ,
} ;
}
}