From a8c56ecd5e9e7c7d37d00c814c864c3bc8b32694 Mon Sep 17 00:00:00 2001 From: Michel Fortin Date: Sun, 10 Aug 2014 15:25:52 -0400 Subject: [PATCH] Adding `url_filter_func` configuration variable. All URLs are now passed through the `encodeURLAttribute` function, which applies the filter as necessary. If the filter function changes the URL of an automatic link, this is reflected in the text of the link too. Inline- and reference-style email links now benefit from the entity obfuscation since `mailto:` is now detected in `encodeURLAttribute` and it triggers the entity obfuscation. Fixes #85. --- Michelf/Markdown.php | 107 +++++++++++++++++++++++++++---------------- 1 file changed, 67 insertions(+), 40 deletions(-) diff --git a/Michelf/Markdown.php b/Michelf/Markdown.php index 0d37012..c5245fd 100644 --- a/Michelf/Markdown.php +++ b/Michelf/Markdown.php @@ -59,6 +59,9 @@ class Markdown implements MarkdownInterface { public $predef_urls = array(); public $predef_titles = array(); + # Optional filter function for URLs + public $url_filter_func = null; + ### Parser Implementation ### @@ -593,7 +596,7 @@ class Markdown implements MarkdownInterface { if (isset($this->urls[$link_id])) { $url = $this->urls[$link_id]; - $url = $this->encodeAttribute($url); + $url = $this->encodeURLAttribute($url); $result = "titles[$link_id] ) ) { @@ -623,7 +626,7 @@ class Markdown implements MarkdownInterface { if ($unhashed != $url) $url = preg_replace('/^<(.*)>$/', '\1', $unhashed); - $url = $this->encodeAttribute($url); + $url = $this->encodeURLAttribute($url); $result = "encodeAttribute($alt_text); if (isset($this->urls[$link_id])) { - $url = $this->encodeAttribute($this->urls[$link_id]); + $url = $this->encodeURLAttribute($this->urls[$link_id]); $result = "\"$alt_text\"";titles[$link_id])) { $title = $this->titles[$link_id]; @@ -728,7 +731,7 @@ class Markdown implements MarkdownInterface { $title =& $matches[7]; $alt_text = $this->encodeAttribute($alt_text); - $url = $this->encodeAttribute($url); + $url = $this->encodeURLAttribute($url); $result = "\"$alt_text\"";encodeAttribute($title); @@ -1260,6 +1263,33 @@ class Markdown implements MarkdownInterface { $text = str_replace('"', '"', $text); return $text; } + + + protected function encodeURLAttribute($url, &$text = null) { + # + # Encode text for a double-quoted HTML attribute containing a URL, + # applying the URL filter if set. Also generates the textual + # representation for the URL (removing mailto: or tel:) storing it in $text. + # This function is *not* suitable for attributes enclosed in single quotes. + # + if ($this->url_filter_func) + $url = call_user_func($this->url_filter_func, $url); + + if (preg_match('{^mailto:}i', $url)) + $url = $this->encodeEntityObfuscatedAttribute($url, $text, 7); + else if (preg_match('{^tel:}i', $url)) + { + $url = $this->encodeAttribute($url); + $text = substr($url, 4); + } + else + { + $url = $this->encodeAttribute($url); + $text = $url; + } + + return $url; + } protected function encodeAmpsAndAngles($text) { @@ -1284,7 +1314,7 @@ class Markdown implements MarkdownInterface { protected function doAutoLinks($text) { - $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i', + $text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i', array($this, '_doAutoLinks_url_callback'), $text); # Email addresses: @@ -1307,48 +1337,46 @@ class Markdown implements MarkdownInterface { > }xi', array($this, '_doAutoLinks_email_callback'), $text); - $text = preg_replace_callback('{<(tel:([^\'">\s]+))>}i',array($this, '_doAutoLinks_tel_callback'), $text); return $text; } - protected function _doAutoLinks_tel_callback($matches) { - $url = $this->encodeAttribute($matches[1]); - $tel = $this->encodeAttribute($matches[2]); - $link = "$tel"; - return $this->hashPart($link); - } protected function _doAutoLinks_url_callback($matches) { - $url = $this->encodeAttribute($matches[1]); - $link = "$url"; + $url = $this->encodeURLAttribute($matches[1], $text); + $link = "$text"; return $this->hashPart($link); } protected function _doAutoLinks_email_callback($matches) { - $address = $matches[1]; - $link = $this->encodeEmailAddress($address); + $addr = $matches[1]; + $url = $this->encodeURLAttribute("mailto:$addr", $text); + $link = "$text"; return $this->hashPart($link); } - protected function encodeEmailAddress($addr) { + protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) { # - # Input: an email address, e.g. "foo@example.com" + # Input: some text to obfuscate, e.g. "mailto:foo@example.com" # - # Output: the email address as a mailto link, with each character - # of the address encoded as either a decimal or hex entity, in - # the hopes of foiling most address harvesting spam bots. E.g.: + # Output: the same text but with most characters encoded as either a + # decimal or hex entity, in the hopes of foiling most address + # harvesting spam bots. E.g.: # - #

foo@exampl - # e.com

+ # m + # + # Note: the additional output $tail is assigned the same value as the + # ouput, minus the number of characters specified by $head_length. # # Based by a filter by Matthew Wickline, posted to BBEdit-Talk. - # With some optimizations by Milian Wolff. + # With some optimizations by Milian Wolff. Forced encoding of HTML + # attribute special characters by Allan Odgaard. # - $addr = "mailto:" . $addr; - $chars = preg_split('/(? $char) { $ord = ord($char); # Ignore non-ascii chars. @@ -1356,18 +1384,17 @@ class Markdown implements MarkdownInterface { $r = ($seed * (1 + $key)) % 100; # Pseudo-random function. # roughly 10% raw, 45% hex, 45% dec # '@' *must* be encoded. I insist. - # '"' has to be encoded inside the attribute - if ($r > 90 && $char != '@' && $char != '"') /* do nothing */; + # '"' and '>' have to be encoded inside the attribute + if ($r > 90 && strpos('@"&>', $char) === false) /* do nothing */; else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';'; else $chars[$key] = '&#'.$ord.';'; } } - - $addr = implode('', $chars); - $text = implode('', array_slice($chars, 7)); # text without `mailto:` - $addr = "$text"; - return $addr; + $text = implode('', $chars); + $tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text; + + return $text; } @@ -2296,7 +2323,7 @@ abstract class _MarkdownExtra_TmpImpl extends \Michelf\Markdown { if (isset($this->urls[$link_id])) { $url = $this->urls[$link_id]; - $url = $this->encodeAttribute($url); + $url = $this->encodeURLAttribute($url); $result = "titles[$link_id] ) ) { @@ -2329,7 +2356,7 @@ abstract class _MarkdownExtra_TmpImpl extends \Michelf\Markdown { if ($unhashed != $url) $url = preg_replace('/^<(.*)>$/', '\1', $unhashed); - $url = $this->encodeAttribute($url); + $url = $this->encodeURLAttribute($url); $result = "encodeAttribute($alt_text); if (isset($this->urls[$link_id])) { - $url = $this->encodeAttribute($this->urls[$link_id]); + $url = $this->encodeURLAttribute($this->urls[$link_id]); $result = "\"$alt_text\"";titles[$link_id])) { $title = $this->titles[$link_id]; @@ -2439,7 +2466,7 @@ abstract class _MarkdownExtra_TmpImpl extends \Michelf\Markdown { $attr = $this->doExtraAttributes("img", $dummy =& $matches[8]); $alt_text = $this->encodeAttribute($alt_text); - $url = $this->encodeAttribute($url); + $url = $this->encodeURLAttribute($url); $result = "\"$alt_text\"";encodeAttribute($title);