diff --git a/Michelf/Markdown.php b/Michelf/Markdown.php
index 0d37012..c5245fd 100644
--- a/Michelf/Markdown.php
+++ b/Michelf/Markdown.php
@@ -59,6 +59,9 @@ class Markdown implements MarkdownInterface {
public $predef_urls = array();
public $predef_titles = array();
+ # Optional filter function for URLs
+ public $url_filter_func = null;
+
### Parser Implementation ###
@@ -593,7 +596,7 @@ class Markdown implements MarkdownInterface {
if (isset($this->urls[$link_id])) {
$url = $this->urls[$link_id];
- $url = $this->encodeAttribute($url);
+ $url = $this->encodeURLAttribute($url);
$result = "titles[$link_id] ) ) {
@@ -623,7 +626,7 @@ class Markdown implements MarkdownInterface {
if ($unhashed != $url)
$url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
- $url = $this->encodeAttribute($url);
+ $url = $this->encodeURLAttribute($url);
$result = "encodeAttribute($alt_text);
if (isset($this->urls[$link_id])) {
- $url = $this->encodeAttribute($this->urls[$link_id]);
+ $url = $this->encodeURLAttribute($this->urls[$link_id]);
$result = "
titles[$link_id])) {
$title = $this->titles[$link_id];
@@ -728,7 +731,7 @@ class Markdown implements MarkdownInterface {
$title =& $matches[7];
$alt_text = $this->encodeAttribute($alt_text);
- $url = $this->encodeAttribute($url);
+ $url = $this->encodeURLAttribute($url);
$result = "
encodeAttribute($title);
@@ -1260,6 +1263,33 @@ class Markdown implements MarkdownInterface {
$text = str_replace('"', '"', $text);
return $text;
}
+
+
+ protected function encodeURLAttribute($url, &$text = null) {
+ #
+ # Encode text for a double-quoted HTML attribute containing a URL,
+ # applying the URL filter if set. Also generates the textual
+ # representation for the URL (removing mailto: or tel:) storing it in $text.
+ # This function is *not* suitable for attributes enclosed in single quotes.
+ #
+ if ($this->url_filter_func)
+ $url = call_user_func($this->url_filter_func, $url);
+
+ if (preg_match('{^mailto:}i', $url))
+ $url = $this->encodeEntityObfuscatedAttribute($url, $text, 7);
+ else if (preg_match('{^tel:}i', $url))
+ {
+ $url = $this->encodeAttribute($url);
+ $text = substr($url, 4);
+ }
+ else
+ {
+ $url = $this->encodeAttribute($url);
+ $text = $url;
+ }
+
+ return $url;
+ }
protected function encodeAmpsAndAngles($text) {
@@ -1284,7 +1314,7 @@ class Markdown implements MarkdownInterface {
protected function doAutoLinks($text) {
- $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i',
+ $text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i',
array($this, '_doAutoLinks_url_callback'), $text);
# Email addresses:
@@ -1307,48 +1337,46 @@ class Markdown implements MarkdownInterface {
>
}xi',
array($this, '_doAutoLinks_email_callback'), $text);
- $text = preg_replace_callback('{<(tel:([^\'">\s]+))>}i',array($this, '_doAutoLinks_tel_callback'), $text);
return $text;
}
- protected function _doAutoLinks_tel_callback($matches) {
- $url = $this->encodeAttribute($matches[1]);
- $tel = $this->encodeAttribute($matches[2]);
- $link = "$tel";
- return $this->hashPart($link);
- }
protected function _doAutoLinks_url_callback($matches) {
- $url = $this->encodeAttribute($matches[1]);
- $link = "$url";
+ $url = $this->encodeURLAttribute($matches[1], $text);
+ $link = "$text";
return $this->hashPart($link);
}
protected function _doAutoLinks_email_callback($matches) {
- $address = $matches[1];
- $link = $this->encodeEmailAddress($address);
+ $addr = $matches[1];
+ $url = $this->encodeURLAttribute("mailto:$addr", $text);
+ $link = "$text";
return $this->hashPart($link);
}
- protected function encodeEmailAddress($addr) {
+ protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) {
#
- # Input: an email address, e.g. "foo@example.com"
+ # Input: some text to obfuscate, e.g. "mailto:foo@example.com"
#
- # Output: the email address as a mailto link, with each character
- # of the address encoded as either a decimal or hex entity, in
- # the hopes of foiling most address harvesting spam bots. E.g.:
+ # Output: the same text but with most characters encoded as either a
+ # decimal or hex entity, in the hopes of foiling most address
+ # harvesting spam bots. E.g.:
#
- # foo@exampl
- # e.com
+ # m
+ #
+ # Note: the additional output $tail is assigned the same value as the
+ # ouput, minus the number of characters specified by $head_length.
#
# Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
- # With some optimizations by Milian Wolff.
+ # With some optimizations by Milian Wolff. Forced encoding of HTML
+ # attribute special characters by Allan Odgaard.
#
- $addr = "mailto:" . $addr;
- $chars = preg_split('/(? $char) {
$ord = ord($char);
# Ignore non-ascii chars.
@@ -1356,18 +1384,17 @@ class Markdown implements MarkdownInterface {
$r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
# roughly 10% raw, 45% hex, 45% dec
# '@' *must* be encoded. I insist.
- # '"' has to be encoded inside the attribute
- if ($r > 90 && $char != '@' && $char != '"') /* do nothing */;
+ # '"' and '>' have to be encoded inside the attribute
+ if ($r > 90 && strpos('@"&>', $char) === false) /* do nothing */;
else if ($r < 45) $chars[$key] = ''.dechex($ord).';';
else $chars[$key] = ''.$ord.';';
}
}
-
- $addr = implode('', $chars);
- $text = implode('', array_slice($chars, 7)); # text without `mailto:`
- $addr = "$text";
- return $addr;
+ $text = implode('', $chars);
+ $tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text;
+
+ return $text;
}
@@ -2296,7 +2323,7 @@ abstract class _MarkdownExtra_TmpImpl extends \Michelf\Markdown {
if (isset($this->urls[$link_id])) {
$url = $this->urls[$link_id];
- $url = $this->encodeAttribute($url);
+ $url = $this->encodeURLAttribute($url);
$result = "titles[$link_id] ) ) {
@@ -2329,7 +2356,7 @@ abstract class _MarkdownExtra_TmpImpl extends \Michelf\Markdown {
if ($unhashed != $url)
$url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
- $url = $this->encodeAttribute($url);
+ $url = $this->encodeURLAttribute($url);
$result = "encodeAttribute($alt_text);
if (isset($this->urls[$link_id])) {
- $url = $this->encodeAttribute($this->urls[$link_id]);
+ $url = $this->encodeURLAttribute($this->urls[$link_id]);
$result = "
titles[$link_id])) {
$title = $this->titles[$link_id];
@@ -2439,7 +2466,7 @@ abstract class _MarkdownExtra_TmpImpl extends \Michelf\Markdown {
$attr = $this->doExtraAttributes("img", $dummy =& $matches[8]);
$alt_text = $this->encodeAttribute($alt_text);
- $url = $this->encodeAttribute($url);
+ $url = $this->encodeURLAttribute($url);
$result = "
encodeAttribute($title);