Adding url_filter_func configuration variable.
All URLs are now passed through the `encodeURLAttribute` function, which applies the filter as necessary. If the filter function changes the URL of an automatic link, this is reflected in the text of the link too. Inline- and reference-style email links now benefit from the entity obfuscation since `mailto:` is now detected in `encodeURLAttribute` and it triggers the entity obfuscation. Fixes #85.
This commit is contained in:
parent
123c43a65c
commit
a8c56ecd5e
1 changed files with 67 additions and 40 deletions
|
|
@ -59,6 +59,9 @@ class Markdown implements MarkdownInterface {
|
|||
public $predef_urls = array();
|
||||
public $predef_titles = array();
|
||||
|
||||
# Optional filter function for URLs
|
||||
public $url_filter_func = null;
|
||||
|
||||
|
||||
### Parser Implementation ###
|
||||
|
||||
|
|
@ -593,7 +596,7 @@ class Markdown implements MarkdownInterface {
|
|||
|
||||
if (isset($this->urls[$link_id])) {
|
||||
$url = $this->urls[$link_id];
|
||||
$url = $this->encodeAttribute($url);
|
||||
$url = $this->encodeURLAttribute($url);
|
||||
|
||||
$result = "<a href=\"$url\"";
|
||||
if ( isset( $this->titles[$link_id] ) ) {
|
||||
|
|
@ -623,7 +626,7 @@ class Markdown implements MarkdownInterface {
|
|||
if ($unhashed != $url)
|
||||
$url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
|
||||
|
||||
$url = $this->encodeAttribute($url);
|
||||
$url = $this->encodeURLAttribute($url);
|
||||
|
||||
$result = "<a href=\"$url\"";
|
||||
if (isset($title)) {
|
||||
|
|
@ -704,7 +707,7 @@ class Markdown implements MarkdownInterface {
|
|||
|
||||
$alt_text = $this->encodeAttribute($alt_text);
|
||||
if (isset($this->urls[$link_id])) {
|
||||
$url = $this->encodeAttribute($this->urls[$link_id]);
|
||||
$url = $this->encodeURLAttribute($this->urls[$link_id]);
|
||||
$result = "<img src=\"$url\" alt=\"$alt_text\"";
|
||||
if (isset($this->titles[$link_id])) {
|
||||
$title = $this->titles[$link_id];
|
||||
|
|
@ -728,7 +731,7 @@ class Markdown implements MarkdownInterface {
|
|||
$title =& $matches[7];
|
||||
|
||||
$alt_text = $this->encodeAttribute($alt_text);
|
||||
$url = $this->encodeAttribute($url);
|
||||
$url = $this->encodeURLAttribute($url);
|
||||
$result = "<img src=\"$url\" alt=\"$alt_text\"";
|
||||
if (isset($title)) {
|
||||
$title = $this->encodeAttribute($title);
|
||||
|
|
@ -1262,6 +1265,33 @@ class Markdown implements MarkdownInterface {
|
|||
}
|
||||
|
||||
|
||||
protected function encodeURLAttribute($url, &$text = null) {
|
||||
#
|
||||
# Encode text for a double-quoted HTML attribute containing a URL,
|
||||
# applying the URL filter if set. Also generates the textual
|
||||
# representation for the URL (removing mailto: or tel:) storing it in $text.
|
||||
# This function is *not* suitable for attributes enclosed in single quotes.
|
||||
#
|
||||
if ($this->url_filter_func)
|
||||
$url = call_user_func($this->url_filter_func, $url);
|
||||
|
||||
if (preg_match('{^mailto:}i', $url))
|
||||
$url = $this->encodeEntityObfuscatedAttribute($url, $text, 7);
|
||||
else if (preg_match('{^tel:}i', $url))
|
||||
{
|
||||
$url = $this->encodeAttribute($url);
|
||||
$text = substr($url, 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
$url = $this->encodeAttribute($url);
|
||||
$text = $url;
|
||||
}
|
||||
|
||||
return $url;
|
||||
}
|
||||
|
||||
|
||||
protected function encodeAmpsAndAngles($text) {
|
||||
#
|
||||
# Smart processing for ampersands and angle brackets that need to
|
||||
|
|
@ -1284,7 +1314,7 @@ class Markdown implements MarkdownInterface {
|
|||
|
||||
|
||||
protected function doAutoLinks($text) {
|
||||
$text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i',
|
||||
$text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i',
|
||||
array($this, '_doAutoLinks_url_callback'), $text);
|
||||
|
||||
# Email addresses: <address@domain.foo>
|
||||
|
|
@ -1307,47 +1337,45 @@ class Markdown implements MarkdownInterface {
|
|||
>
|
||||
}xi',
|
||||
array($this, '_doAutoLinks_email_callback'), $text);
|
||||
$text = preg_replace_callback('{<(tel:([^\'">\s]+))>}i',array($this, '_doAutoLinks_tel_callback'), $text);
|
||||
|
||||
return $text;
|
||||
}
|
||||
protected function _doAutoLinks_tel_callback($matches) {
|
||||
$url = $this->encodeAttribute($matches[1]);
|
||||
$tel = $this->encodeAttribute($matches[2]);
|
||||
$link = "<a href=\"$url\">$tel</a>";
|
||||
return $this->hashPart($link);
|
||||
}
|
||||
protected function _doAutoLinks_url_callback($matches) {
|
||||
$url = $this->encodeAttribute($matches[1]);
|
||||
$link = "<a href=\"$url\">$url</a>";
|
||||
$url = $this->encodeURLAttribute($matches[1], $text);
|
||||
$link = "<a href=\"$url\">$text</a>";
|
||||
return $this->hashPart($link);
|
||||
}
|
||||
protected function _doAutoLinks_email_callback($matches) {
|
||||
$address = $matches[1];
|
||||
$link = $this->encodeEmailAddress($address);
|
||||
$addr = $matches[1];
|
||||
$url = $this->encodeURLAttribute("mailto:$addr", $text);
|
||||
$link = "<a href=\"$url\">$text</a>";
|
||||
return $this->hashPart($link);
|
||||
}
|
||||
|
||||
|
||||
protected function encodeEmailAddress($addr) {
|
||||
protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) {
|
||||
#
|
||||
# Input: an email address, e.g. "foo@example.com"
|
||||
# Input: some text to obfuscate, e.g. "mailto:foo@example.com"
|
||||
#
|
||||
# Output: the email address as a mailto link, with each character
|
||||
# of the address encoded as either a decimal or hex entity, in
|
||||
# the hopes of foiling most address harvesting spam bots. E.g.:
|
||||
# Output: the same text but with most characters encoded as either a
|
||||
# decimal or hex entity, in the hopes of foiling most address
|
||||
# harvesting spam bots. E.g.:
|
||||
#
|
||||
# <p><a href="mailto:foo
|
||||
# mailto:foo
|
||||
# @example.co
|
||||
# m">foo@exampl
|
||||
# e.com</a></p>
|
||||
# m
|
||||
#
|
||||
# Note: the additional output $tail is assigned the same value as the
|
||||
# ouput, minus the number of characters specified by $head_length.
|
||||
#
|
||||
# Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
|
||||
# With some optimizations by Milian Wolff.
|
||||
# With some optimizations by Milian Wolff. Forced encoding of HTML
|
||||
# attribute special characters by Allan Odgaard.
|
||||
#
|
||||
$addr = "mailto:" . $addr;
|
||||
$chars = preg_split('/(?<!^)(?!$)/', $addr);
|
||||
$seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
|
||||
if ($text == "") return $tail = "";
|
||||
|
||||
$chars = preg_split('/(?<!^)(?!$)/', $text);
|
||||
$seed = (int)abs(crc32($text) / strlen($text)); # Deterministic seed.
|
||||
|
||||
foreach ($chars as $key => $char) {
|
||||
$ord = ord($char);
|
||||
|
|
@ -1356,18 +1384,17 @@ class Markdown implements MarkdownInterface {
|
|||
$r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
|
||||
# roughly 10% raw, 45% hex, 45% dec
|
||||
# '@' *must* be encoded. I insist.
|
||||
# '"' has to be encoded inside the attribute
|
||||
if ($r > 90 && $char != '@' && $char != '"') /* do nothing */;
|
||||
# '"' and '>' have to be encoded inside the attribute
|
||||
if ($r > 90 && strpos('@"&>', $char) === false) /* do nothing */;
|
||||
else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
|
||||
else $chars[$key] = '&#'.$ord.';';
|
||||
}
|
||||
}
|
||||
|
||||
$addr = implode('', $chars);
|
||||
$text = implode('', array_slice($chars, 7)); # text without `mailto:`
|
||||
$addr = "<a href=\"$addr\">$text</a>";
|
||||
$text = implode('', $chars);
|
||||
$tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text;
|
||||
|
||||
return $addr;
|
||||
return $text;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -2296,7 +2323,7 @@ abstract class _MarkdownExtra_TmpImpl extends \Michelf\Markdown {
|
|||
|
||||
if (isset($this->urls[$link_id])) {
|
||||
$url = $this->urls[$link_id];
|
||||
$url = $this->encodeAttribute($url);
|
||||
$url = $this->encodeURLAttribute($url);
|
||||
|
||||
$result = "<a href=\"$url\"";
|
||||
if ( isset( $this->titles[$link_id] ) ) {
|
||||
|
|
@ -2329,7 +2356,7 @@ abstract class _MarkdownExtra_TmpImpl extends \Michelf\Markdown {
|
|||
if ($unhashed != $url)
|
||||
$url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
|
||||
|
||||
$url = $this->encodeAttribute($url);
|
||||
$url = $this->encodeURLAttribute($url);
|
||||
|
||||
$result = "<a href=\"$url\"";
|
||||
if (isset($title)) {
|
||||
|
|
@ -2412,7 +2439,7 @@ abstract class _MarkdownExtra_TmpImpl extends \Michelf\Markdown {
|
|||
|
||||
$alt_text = $this->encodeAttribute($alt_text);
|
||||
if (isset($this->urls[$link_id])) {
|
||||
$url = $this->encodeAttribute($this->urls[$link_id]);
|
||||
$url = $this->encodeURLAttribute($this->urls[$link_id]);
|
||||
$result = "<img src=\"$url\" alt=\"$alt_text\"";
|
||||
if (isset($this->titles[$link_id])) {
|
||||
$title = $this->titles[$link_id];
|
||||
|
|
@ -2439,7 +2466,7 @@ abstract class _MarkdownExtra_TmpImpl extends \Michelf\Markdown {
|
|||
$attr = $this->doExtraAttributes("img", $dummy =& $matches[8]);
|
||||
|
||||
$alt_text = $this->encodeAttribute($alt_text);
|
||||
$url = $this->encodeAttribute($url);
|
||||
$url = $this->encodeURLAttribute($url);
|
||||
$result = "<img src=\"$url\" alt=\"$alt_text\"";
|
||||
if (isset($title)) {
|
||||
$title = $this->encodeAttribute($title);
|
||||
|
|
|
|||
Loading…
Reference in a new issue