commit bdcd08b467e5774e92f34252d6caee8db370d9c5
Author: Michel Fortin ` tags for something like this:
+
+ * this
+
+ * sub
+
+ that
+
+* Fixed some incorrect behaviour with emphasis. This will now work
+ as it should:
+
+ *test **thing***
+ **test *thing***
+ ***thing* test**
+ ***thing** test*
+
+ Name: __________
+ Address: _______
+
+* Correct a small bug in `_TokenizeHTML` where a Doctype declaration
+ was not seen as HTML.
+
+* Major rewrite of the WordPress integration code that should
+ correct many problems by preventing default WordPress filters from
+ tampering with Markdown-formatted text. More details here:
+ ` tag.
+
+* `_HashHTMLBlocks()` now tolerates trailing spaces and tabs following
+ HTML comments and ` ` tags.
+
+* Changed `_StripLinkDefinitions()` so that link definitions must
+ occur within three spaces of the left margin. Thus if you indent
+ a link definition by four spaces or a tab, it will now be a code
+ block.
+
+* You can now write empty links:
+
+ [like this]()
+
+ and they'll be turned into anchor tags with empty href attributes.
+ This should have worked before, but didn't.
+
+* `***this***` and `___this___` are now turned into
+
+ this
+
+ Instead of
+
+ this This is \"before\" [tab] and "after" a tab. ' => md5(' '), '
+
+Markdown can be configured to produce HTML-style tags; e.g.:
+
+
+
+To do this, you must edit the "$md_empty_element_suffix" variable
+below the "Global default settings" header at the start of the
+"markdown.php" file.
+
+
+### WordPress-Specific Settings ###
+
+By default, the Markdown plugin applies to both posts and comments on
+your WordPress weblog. To deactivate one or the other, edit the
+`$md_wp_posts` or `$md_wp_comments` variable under the "WordPress
+settings" header at the start of the "markdown.php" file.
+
+
+Bugs
+----
+
+To file bug reports please send email to:
+
+
+ which isn't valid.
+
+* Fixed problem for links defined with urls that include parens, e.g.:
+
+ [1]: http://sources.wikipedia.org/wiki/Middle_East_Policy_(Chomsky)
+
+ "Chomsky" was being erroneously treated as the URL's title.
+
+* Double quotes in the title of an inline link used to give strange
+ results (incorrectly made entities). Fixed.
+
+* Tabs are now correctly changed into spaces. Previously, only
+ the first tab was converted. In code blocks, the second one was too,
+ but was not always correctly aligned.
+
+* Fixed a bug where a tab character inserted after a quote on the same
+ line could add a slash before the quotes.
+
+ This is "before" [tab] and "after" a tab.
+
+ Previously gave this result:
+
+
+
+ This is acheived by backslash-escaping block markers before sending
+ text through the Markdown filter.
+
+ The improved Textile comatibility means that the Markdown syntax will now
+ be processed for comments in TextPattern (only for span elements due to
+ TextPattern using the lite mode for comments). Sadly, due to TextPattern
+ tag stripping, sample code in code span and auto-links will be stripped
+ before the Markdown filter can see them. So I guess I should say it
+ half-work for comments TextPattern.
+
+
+1.0.1 (16 Dec 2004):
+
+* Changed the syntax rules for code blocks and spans. Previously,
+ backslash escapes for special Markdown characters were processed
+ everywhere other than within inline HTML tags. Now, the contents of
+ code blocks and spans are no longer processed for backslash escapes.
+ This means that code blocks and spans are now treated literally,
+ with no special rules to worry about regarding backslashes.
+
+ **IMPORTANT**: This breaks the syntax from all previous versions of
+ Markdown. Code blocks and spans involving backslash characters will
+ now generate different output than before.
+
+ Implementation-wise, this change was made by moving the call to
+ `_EscapeSpecialChars()` from the top-level `Markdown()` function to
+ within `_RunSpanGamut()`.
+
+* Significants performance improvement in `_DoHeader`, `_Detab`
+ and `_TokenizeHTML`.
+
+* Added `>`, `+`, and `-` to the list of backslash-escapable
+ characters. These should have been done when these characters
+ were added as unordered list item markers.
+
+* Inline links using `<` and `>` URL delimiters weren't working:
+
+ like [this](
` tags.
+
+* Changed special case pattern for hashing `
` tags in
+ `_HashHTMLBlocks()` so that they must occur within three spaces
+ of left margin. (With 4 spaces or a tab, they should be
+ code blocks, but weren't before this fix.)
+
+* Auto-linked email address can now optionally contain
+ a 'mailto:' protocol. I.e. these are equivalent:
+
+
' => md5(''), '
'=> md5(''),
+ '') == 0) return $text; + return '
'.$text.'
'; + } + + function mdwp_strip_p($t) { return preg_replace('{?[pP]>}', '', $t); } + + function mdwp_hide_tags($text) { + global $markdown_hidden_tags; + return str_replace(array_keys($markdown_hidden_tags), + array_values($markdown_hidden_tags), $text); + } + function mdwp_show_tags($text) { + global $markdown_hidden_tags; + return str_replace(array_values($markdown_hidden_tags), + array_keys($markdown_hidden_tags), $text); + } +} + + +### bBlog Plugin Info ### + +function identify_modifier_markdown() { + return array( + 'name' => 'markdown', + 'type' => 'modifier', + 'nicename' => 'Markdown', + 'description' => 'A text-to-HTML conversion tool for web writers', + 'authors' => 'Michel Fortin and John Gruber', + 'licence' => 'GPL', + 'version' => MARKDOWN_VERSION, + 'help' => 'Markdown syntax allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by John Gruber. More...' + ); +} + + +### Smarty Modifier Interface ### + +function smarty_modifier_markdown($text) { + return Markdown($text); +} + + +### Textile Compatibility Mode ### + +# Rename this file to "classTextile.php" and it can replace Textile everywhere. + +if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) { + # Try to include PHP SmartyPants. Should be in the same directory. + @include_once 'smartypants.php'; + # Fake Textile class. It calls Markdown instead. + class Textile { + function TextileThis($text, $lite='', $encode='') { + if ($lite == '' && $encode == '') $text = Markdown($text); + if (function_exists('SmartyPants')) $text = SmartyPants($text); + return $text; + } + # Workaround to ensure compatibility with TextPattern 4.0.3. + function blockLite($text) { return $text; } + } +} + + + +# +# Markdown Parser Class +# + +class Markdown_Parser { + + # Regex to match balanced [brackets]. + # Needed to insert a maximum bracked depth while converting to PHP. + var $nested_brackets_depth = 6; + var $nested_brackets; + + # Table of hash values for escaped characters: + var $escape_chars = '\`*_{}[]()>#+-.!'; + var $escape_table = array(); + var $backslash_escape_table = array(); + + # Change to ">" for HTML output. + var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX; + var $tab_width = MARKDOWN_TAB_WIDTH; + + + function Markdown_Parser() { + # + # Constructor function. Initialize appropriate member variables. + # + $this->nested_brackets = + str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth). + str_repeat('\])*', $this->nested_brackets_depth); + + # Create an identical table but for escaped characters. + foreach (preg_split('/(?!^|$)/', $this->escape_chars) as $char) { + $hash = md5($char); + $this->escape_table[$char] = $hash; + $this->backslash_escape_table["\\$char"] = $hash; + } + } + + + # Internal hashes used during transformation. + var $urls = array(); + var $titles = array(); + var $html_blocks = array(); + + + function transform($text) { + # + # Main function. The order in which other subs are called here is + # essential. Link and image substitutions need to happen before + # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the + # ands around + # "paragraphs" that are wrapped in non-block-level tags, such as anchors, + # phrase emphasis, and spans. The list of tags we're looking for is + # hard-coded: + $block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'. + 'script|noscript|form|fieldset|iframe|math|ins|del'; + $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'. + 'script|noscript|form|fieldset|iframe|math'; + + # First, look for nested blocks, e.g.: + #
tags around block-level tags.
+ $text = $this->hashHTMLBlocks($text);
+ $text = $this->formParagraphs($text);
+
+ return $text;
+ }
+
+
+ function runSpanGamut($text) {
+ #
+ # These are all the transformations that occur *within* block-level
+ # tags like paragraphs, headers, and list items.
+ #
+ $text = $this->doCodeSpans($text);
+
+ $text = $this->escapeSpecialChars($text);
+
+ # Process anchor and image tags. Images must come first,
+ # because ![foo][f] looks like an anchor.
+ $text = $this->doImages($text);
+ $text = $this->doAnchors($text);
+
+ # Make links out of things like ` Just type tags
+ #
+ # Strip leading and trailing lines:
+ $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
+
+ $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
+
+ #
+ # Wrap tags.
+ #
+ foreach ($grafs as $key => $value) {
+ if (!isset( $this->html_blocks[$value] )) {
+ $value = $this->runSpanGamut($value);
+ $value = preg_replace('/^([ \t]*)/', ' ', $value);
+ $value .= "
empty_element_suffix\n", $text);
+
+ return $text;
+ }
+
+
+ function escapeSpecialChars($text) {
+ $tokens = $this->tokenizeHTML($text);
+
+ $text = ''; # rebuild $text from the tokens
+ # $in_pre = 0; # Keep track of when we're inside or
tags.
+ # $tags_to_skip = "!<(/?)(?:pre|code|kbd|script|math)[\s>]!";
+
+ foreach ($tokens as $cur_token) {
+ if ($cur_token[0] == 'tag') {
+ # Within tags, encode * and _ so they don't conflict
+ # with their use in Markdown for italics and strong.
+ # We're replacing each such character with its
+ # corresponding MD5 checksum value; this is likely
+ # overkill, but it should prevent us from colliding
+ # with the escape values by accident.
+ $cur_token[1] = str_replace(array('*', '_'),
+ array($this->escape_table['*'], $this->escape_table['_']),
+ $cur_token[1]);
+ $text .= $cur_token[1];
+ } else {
+ $t = $cur_token[1];
+ $t = $this->encodeBackslashEscapes($t);
+ $text .= $t;
+ }
+ }
+ return $text;
+ }
+
+
+ function doAnchors($text) {
+ #
+ # Turn Markdown link shortcuts into XHTML tags.
+ #
+ #
+ # First, handle reference-style links: [link text] [id]
+ #
+ $text = preg_replace_callback("{
+ ( # wrap whole match in $1
+ \\[
+ ($this->nested_brackets) # link text = $2
+ \\]
+
+ [ ]? # one optional space
+ (?:\\n[ ]*)? # one optional newline followed by spaces
+
+ \\[
+ (.*?) # id = $3
+ \\]
+ )
+ }xs",
+ array(&$this, '_doAnchors_reference_callback'), $text);
+
+ #
+ # Next, inline-style links: [link text](url "optional title")
+ #
+ $text = preg_replace_callback("{
+ ( # wrap whole match in $1
+ \\[
+ ($this->nested_brackets) # link text = $2
+ \\]
+ \\( # literal paren
+ [ \\t]*
+ (.*?)>? # href = $3
+ [ \\t]*
+ ( # $4
+ (['\"]) # quote char = $5
+ (.*?) # Title = $6
+ \\5 # matching quote
+ )? # title is optional
+ \\)
+ )
+ }xs",
+ array(&$this, '_DoAnchors_inline_callback'), $text);
+
+ return $text;
+ }
+ function _doAnchors_reference_callback($matches) {
+ $whole_match = $matches[1];
+ $link_text = $matches[2];
+ $link_id = strtolower($matches[3]);
+
+ if ($link_id == "") {
+ $link_id = strtolower($link_text); # for shortcut links like [this][].
+ }
+
+ if (isset($this->urls[$link_id])) {
+ $url = $this->urls[$link_id];
+ # We've got to encode these to avoid conflicting with italics/bold.
+ $url = str_replace(array('*', '_'),
+ array($this->escape_table['*'], $this->escape_table['_']),
+ $url);
+ $result = "titles[$link_id] ) ) {
+ $title = $this->titles[$link_id];
+ $title = str_replace(array('*', '_'),
+ array($this->escape_table['*'],
+ $this->escape_table['_']), $title);
+ $result .= " title=\"$title\"";
+ }
+ $result .= ">$link_text";
+ }
+ else {
+ $result = $whole_match;
+ }
+ return $result;
+ }
+ function _doAnchors_inline_callback($matches) {
+ $whole_match = $matches[1];
+ $link_text = $matches[2];
+ $url = $matches[3];
+ $title =& $matches[6];
+
+ # We've got to encode these to avoid conflicting with italics/bold.
+ $url = str_replace(array('*', '_'),
+ array($this->escape_table['*'], $this->escape_table['_']),
+ $url);
+ $result = "escape_table['*'], $this->escape_table['_']),
+ $title);
+ $result .= " title=\"$title\"";
+ }
+
+ $result .= ">$link_text";
+
+ return $result;
+ }
+
+
+ function doImages($text) {
+ #
+ # Turn Markdown image shortcuts into tags.
+ #
+ #
+ # First, handle reference-style labeled images: ![alt text][id]
+ #
+ $text = preg_replace_callback('{
+ ( # wrap whole match in $1
+ !\[
+ ('.$this->nested_brackets.') # alt text = $2
+ \]
+
+ [ ]? # one optional space
+ (?:\n[ ]*)? # one optional newline followed by spaces
+
+ \[
+ (.*?) # id = $3
+ \]
+
+ )
+ }xs',
+ array(&$this, '_doImages_reference_callback'), $text);
+
+ #
+ # Next, handle inline images: 
+ # Don't forget: encode * and _
+
+ $text = preg_replace_callback('{
+ ( # wrap whole match in $1
+ !\[
+ ('.$this->nested_brackets.') # alt text = $2
+ \]
+ \( # literal paren
+ [ \t]*
+ (\S+?)>? # src url = $3
+ [ \t]*
+ ( # $4
+ ([\'"]) # quote char = $5
+ (.*?) # title = $6
+ \5 # matching quote
+ [ \t]*
+ )? # title is optional
+ \)
+ )
+ }xs',
+ array(&$this, '_doImages_inline_callback'), $text);
+
+ return $text;
+ }
+ function _doImages_reference_callback($matches) {
+ $whole_match = $matches[1];
+ $alt_text = $matches[2];
+ $link_id = strtolower($matches[3]);
+
+ if ($link_id == "") {
+ $link_id = strtolower($alt_text); # for shortcut links like ![this][].
+ }
+
+ $alt_text = str_replace('"', '"', $alt_text);
+ if (isset($this->urls[$link_id])) {
+ $url = $this->urls[$link_id];
+ # We've got to encode these to avoid conflicting with italics/bold.
+ $url = str_replace(array('*', '_'),
+ array($this->escape_table['*'], $this->escape_table['_']),
+ $url);
+ $result = "
titles[$link_id])) {
+ $title = $this->titles[$link_id];
+ $title = str_replace(array('*', '_'),
+ array($this->escape_table['*'],
+ $this->escape_table['_']), $title);
+ $result .= " title=\"$title\"";
+ }
+ $result .= $this->empty_element_suffix;
+ }
+ else {
+ # If there's no such link ID, leave intact:
+ $result = $whole_match;
+ }
+
+ return $result;
+ }
+ function _doImages_inline_callback($matches) {
+ $whole_match = $matches[1];
+ $alt_text = $matches[2];
+ $url = $matches[3];
+ $title = '';
+ if (isset($matches[6])) {
+ $title = $matches[6];
+ }
+
+ $alt_text = str_replace('"', '"', $alt_text);
+ $title = str_replace('"', '"', $title);
+ # We've got to encode these to avoid conflicting with italics/bold.
+ $url = str_replace(array('*', '_'),
+ array($this->escape_table['*'], $this->escape_table['_']),
+ $url);
+ $result = "
escape_table['*'], $this->escape_table['_']),
+ $title);
+ $result .= " title=\"$title\""; # $title already quoted
+ }
+ $result .= $this->empty_element_suffix;
+
+ return $result;
+ }
+
+
+ function doHeaders($text) {
+ # Setext-style headers:
+ # Header 1
+ # ========
+ #
+ # Header 2
+ # --------
+ #
+ $text = preg_replace_callback('{ ^(.+)[ \t]*\n=+[ \t]*\n+ }mx',
+ array(&$this, '_doHeaders_callback_setext_h1'), $text);
+ $text = preg_replace_callback('{ ^(.+)[ \t]*\n-+[ \t]*\n+ }mx',
+ array(&$this, '_doHeaders_callback_setext_h2'), $text);
+
+ # atx-style headers:
+ # # Header 1
+ # ## Header 2
+ # ## Header 2 with closing hashes ##
+ # ...
+ # ###### Header 6
+ #
+ $text = preg_replace_callback('{
+ ^(\#{1,6}) # $1 = string of #\'s
+ [ \t]*
+ (.+?) # $2 = Header text
+ [ \t]*
+ \#* # optional closing #\'s (not counted)
+ \n+
+ }xm',
+ array(&$this, '_doHeaders_callback_atx'), $text);
+
+ return $text;
+ }
+ function _doHeaders_callback_setext_h1($matches) {
+ return "
".$this->runSpanGamut($matches[1])."
\n\n";
+ }
+ function _doHeaders_callback_setext_h2($matches) {
+ return "".$this->runSpanGamut($matches[1])."
\n\n";
+ }
+ function _doHeaders_callback_atx($matches) {
+ $level = strlen($matches[1]);
+ return "` blocks.
+ #
+ $text = preg_replace_callback('{
+ (?:\n\n|\A)
+ ( # $1 = the code block -- one or more lines, starting with a space/tab
+ (?:
+ (?:[ ]{'.$this->tab_width.'} | \t) # Lines must start with a tab or a tab-width of spaces
+ .*\n+
+ )+
+ )
+ ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
+ }xm',
+ array(&$this, '_doCodeBlocks_callback'), $text);
+
+ return $text;
+ }
+ function _doCodeBlocks_callback($matches) {
+ $codeblock = $matches[1];
+
+ $codeblock = $this->encodeCode($this->outdent($codeblock));
+ // $codeblock = $this->detab($codeblock);
+ # trim leading newlines and trailing whitespace
+ $codeblock = preg_replace(array('/\A\n+/', '/\s+\z/'), '', $codeblock);
+
+ $result = "\n\n
\n\n";
+
+ return $result;
+ }
+
+
+ function doCodeSpans($text) {
+ #
+ # * Backtick quotes are used for " . $codeblock . "\n spans.
+ #
+ # * You can use multiple backticks as the delimiters if you want to
+ # include literal backticks in the code span. So, this input:
+ #
+ # Just type ``foo `bar` baz`` at the prompt.
+ #
+ # Will translate to:
+ #
+ # foo `bar` baz at the prompt.`bar` ...
+ #
+ $text = preg_replace_callback('@
+ (?encodeCode($c);
+ return "$c";
+ }
+
+
+ function encodeCode($_) {
+ #
+ # Encode/escape certain characters inside Markdown code runs.
+ # The point is that in code, these characters are literals,
+ # and lose their special Markdown meanings.
+ #
+ # Encode all ampersands; HTML entities are not
+ # entities within a Markdown code span.
+ $_ = str_replace('&', '&', $_);
+
+ # Do the angle bracket song and dance:
+ $_ = str_replace(array('<', '>'),
+ array('<', '>'), $_);
+
+ # Now, escape characters that are magic in Markdown:
+ $_ = str_replace(array_keys($this->escape_table),
+ array_values($this->escape_table), $_);
+
+ return $_;
+ }
+
+
+ function doItalicsAndBold($text) {
+ # must go first:
+ $text = preg_replace('{
+ ( # $1: Marker
+ (?\2', $text);
+ # Then :
+ $text = preg_replace(
+ '{ ( (?\2', $text);
+
+ return $text;
+ }
+
+
+ function doBlockQuotes($text) {
+ $text = preg_replace_callback('/
+ ( # Wrap whole match in $1
+ (
+ ^[ \t]*>[ \t]? # ">" at the start of a line
+ .+\n # rest of the first line
+ (.+\n)* # subsequent consecutive lines
+ \n* # blanks
+ )+
+ )
+ /xm',
+ array(&$this, '_doBlockQuotes_callback'), $text);
+
+ return $text;
+ }
+ function _doBlockQuotes_callback($matches) {
+ $bq = $matches[1];
+ # trim one level of quoting - trim whitespace-only lines
+ $bq = preg_replace(array('/^[ \t]*>[ \t]?/m', '/^[ \t]+$/m'), '', $bq);
+ $bq = $this->runBlockGamut($bq); # recurse
+
+ $bq = preg_replace('/^/m', " ", $bq);
+ # These leading spaces screw with content, so we need to fix that:
+ $bq = preg_replace_callback('{(\s*.+?
)}sx',
+ array(&$this, '_DoBlockQuotes_callback2'), $bq);
+
+ return "\n$bq\n
\n\n";
+ }
+ function _doBlockQuotes_callback2($matches) {
+ $pre = $matches[1];
+ $pre = preg_replace('/^ /m', '', $pre);
+ return $pre;
+ }
+
+
+ function formParagraphs($text) {
+ #
+ # Params:
+ # $text - string to process with html as well).
+
+For more information about Markdown's syntax, see:
+
+