From e2b2ddc026c877b5e4979628ff77355592aecaea Mon Sep 17 00:00:00 2001 From: Michel Fortin Date: Tue, 28 Aug 2007 17:15:30 -0400 Subject: [PATCH] Fixed bug with backslash before code span, fixed a speed issue with "unhash", and various other speed improvements. --- PHP Markdown Readme.text | 13 +- markdown.php | 526 ++++++++++++++++----------------------- 2 files changed, 224 insertions(+), 315 deletions(-) diff --git a/PHP Markdown Readme.text b/PHP Markdown Readme.text index d5d1d1a..5a56fa2 100644 --- a/PHP Markdown Readme.text +++ b/PHP Markdown Readme.text @@ -1,7 +1,7 @@ PHP Markdown ============ -Version 1.0.1h - Fri 3 Aug 2007 +Version 1.0.1i - Tue 28 Aug 2007 by Michel Fortin @@ -203,6 +203,17 @@ expected; (3) the output PHP Markdown actually produced. Version History --------------- +1.0.1i (28 Aug 2007): + +* Fixed a problem where an escaped backslash before a code span + would prevent the code span from being created. This should now + work as expected: + + Litteral backslash: \\`code span` + +* Overall speed improvements, especially with long documents. + + 1.0.1h (3 Aug 2007): * Added two properties (`no_markup` and `no_entities`) to the parser diff --git a/markdown.php b/markdown.php index a699f1d..deacba6 100644 --- a/markdown.php +++ b/markdown.php @@ -12,7 +12,7 @@ # -define( 'MARKDOWN_VERSION', "1.0.1h" ); # Fri 3 Aug 2007 +define( 'MARKDOWN_VERSION', "1.0.1i" ); # Tue 28 Aug 2007 # @@ -62,7 +62,7 @@ function Markdown($text) { Plugin Name: Markdown Plugin URI: http://www.michelf.com/projects/php-markdown/ Description: Markdown syntax allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by John Gruber. More... -Version: 1.0.1h +Version: 1.0.1i Author: Michel Fortin Author URI: http://www.michelf.com/ */ @@ -200,8 +200,6 @@ class Markdown_Parser { # Table of hash values for escaped characters: var $escape_chars = '\`*_{}[]()>#+-.!'; -// var $escape_table = array(); - var $backslash_escape_table = array(); # Change to ">" for HTML output. var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX; @@ -226,13 +224,6 @@ class Markdown_Parser { str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth). str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth); - # Create an identical table but for escaped characters. - foreach (preg_split('/(?!^|$)/', $this->escape_chars) as $char) { - $entity = "&#". ord($char). ";"; -// $this->escape_table[$char] = $entity; - $this->backslash_escape_table["\\$char"] = $entity; - } - # Sort document, block, and span gamut in ascendent priority order. asort($this->document_gamut); asort($this->block_gamut); @@ -243,8 +234,7 @@ class Markdown_Parser { # Internal hashes used during transformation. var $urls = array(); var $titles = array(); - var $html_blocks = array(); - var $html_hashes = array(); # Contains both blocks and span hashes. + var $html_hashes = array(); # Status flag to avoid invalid nesting. var $in_anchor = false; @@ -263,12 +253,11 @@ class Markdown_Parser { # articles): $this->urls = array(); $this->titles = array(); - $this->html_blocks = array(); $this->html_hashes = array(); # Standardize line endings: # DOS to Unix and Mac to Unix - $text = str_replace(array("\r\n", "\r"), "\n", $text); + $text = preg_replace('{\r\n?}', "\n", $text); # Make sure $text ends with a couple of newlines: $text .= "\n\n"; @@ -342,7 +331,7 @@ class Markdown_Parser { function hashHTMLBlocks($text) { if ($this->no_markup) return $text; - + $less_than_tab = $this->tab_width - 1; # Hashify HTML blocks: @@ -351,8 +340,14 @@ class Markdown_Parser { # "paragraphs" that are wrapped in non-block-level tags, such as anchors, # phrase emphasis, and spans. The list of tags we're looking for is # hard-coded: - $block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. - 'script|noscript|form|fieldset|iframe|math|ins|del'; + # + # * List "a" is made of tags which can be both inline or block-level. + # These will be treated block-level when the start tag is alone on + # its line, otherwise they're not matched here and will be taken as + # inline later. + # * List "b" is made of tags which are always block-level; + # + $block_tags_a = 'ins|del'; $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. 'script|noscript|form|fieldset|iframe|math'; @@ -379,7 +374,7 @@ class Markdown_Parser { | <\2 # nested opening tag '.$attr.' # attributes - (?: + (?> /> | >', $nested_tags_level). # end of opening tag @@ -392,6 +387,7 @@ class Markdown_Parser { ) )*', $nested_tags_level); + $content2 = str_replace('\2', '\3', $content); # First, look for nested blocks, e.g.: #
@@ -404,46 +400,38 @@ class Markdown_Parser { # the inner nested divs must be indented. # We need to do this before the next, more liberal match, because the next # match will start at the first `
` and stop at the first `
`. - $text = preg_replace_callback('{ - ( # save in $1 - ^ # start of line (with /m) - <('.$block_tags_a.')# start tag = $2 - '.$attr.'>\n # attributes followed by > and \n - '.$content.' # content, support nesting - # the matching end tag - [ ]* # trailing spaces/tabs - (?=\n+|\Z) # followed by a newline or end of document - ) - }xmi', - array(&$this, '_hashHTMLBlocks_callback'), - $text); + $text = preg_replace_callback('{(?> + (?> + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 - # - # Match from `\n` to `\n`, handling nested tags in between. - # - $text = preg_replace_callback('{ - ( # save in $1 - ^ # start of line (with /m) + # Match from `\n` to `\n`, handling nested tags + # in between. + + [ ]{0,'.$less_than_tab.'} <('.$block_tags_b.')# start tag = $2 - '.$attr.'> # attributes followed by > + '.$attr.'> # attributes followed by > and \n '.$content.' # content, support nesting # the matching end tag [ ]* # trailing spaces/tabs (?=\n+|\Z) # followed by a newline or end of document - ) - }xmi', - array(&$this, '_hashHTMLBlocks_callback'), - $text); - # Special case just for
. It was easier to make a special case than - # to make the other regex more complicated. - $text = preg_replace_callback('{ - (?: - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in $1 + | # Special version for tags of group a. + + [ ]{0,'.$less_than_tab.'} + <('.$block_tags_a.')# start tag = $3 + '.$attr.'>[ ]*\n # attributes followed by > + '.$content2.' # content, support nesting + # the matching end tag + [ ]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + + | # Special case just for
. It was easier to make a special + # case than to make the other regex more complicated. + [ ]{0,'.$less_than_tab.'} <(hr) # start tag = $2 \b # word break @@ -451,38 +439,18 @@ class Markdown_Parser { /?> # the matching end tag [ ]* (?=\n{2,}|\Z) # followed by a blank line or end of document - ) - }xi', - array(&$this, '_hashHTMLBlocks_callback'), - $text); - - # Special case for standalone HTML comments: - $text = preg_replace_callback('{ - (?: - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in $1 + + | # Special case for standalone HTML comments: + [ ]{0,'.$less_than_tab.'} (?s: ) [ ]* (?=\n{2,}|\Z) # followed by a blank line or end of document - ) - }x', - array(&$this, '_hashHTMLBlocks_callback'), - $text); - - # PHP and ASP-style processor instructions (hashBlock($text); return "\n\n$key\n\n"; } - - - function hashBlock($text) { + + + function hashPart($text, $boundary = 'X') { # - # Called whenever a tag must be hashed when a function insert a block-level - # tag in $text, it pass through this function and is automaticaly escaped, - # which remove the need to call _HashHTMLBlocks at every step. + # Called whenever a tag must be hashed when a function insert an atomic + # element in the text stream. Passing $text to through this function gives + # a unique text-token which will be reverted back when calling unhash. + # + # The $boundary argument specify what character should be used to surround + # the token. By convension, "B" is used for block elements that needs not + # to be wrapped into paragraph tags at the end, ":" is used for elements + # that are word separators and "S" is used for general span-level elements. # # Swap back any tag hash found in $text so we do not have to `unhash` # multiple times at the end. $text = $this->unhash($text); # Then hash the block. - $key = "B\x1A". md5($text); + static $i = 0; + $key = "$boundary\x1A" . ++$i . $boundary; $this->html_hashes[$key] = $text; - $this->html_blocks[$key] = $text; return $key; # String that will replace the tag. } - function hashSpan($text, $word_separator = false) { + function hashBlock($text) { # - # Called whenever a tag must be hashed when a function insert a span-level - # element in $text, it pass through this function and is automaticaly - # escaped, blocking invalid nested overlap. If optional argument - # $word_separator is true, surround the hash value by spaces. + # Shortcut function for hashPart with block-level boundaries. # - # Swap back any tag hash found in $text so we do not have to `unhash` - # multiple times at the end. - $text = $this->unhash($text); - - # Then hash the span. - $key = "S\x1A". md5($text); - if ($word_separator) $key = ":$key:"; - - $this->html_hashes[$key] = $text; - return $key; # String that will replace the span tag. + return $this->hashPart($text, 'B'); } @@ -590,9 +552,16 @@ class Markdown_Parser { function doHorizontalRules($text) { # Do Horizontal Rules: return preg_replace( - array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ ]*$}mx', - '{^[ ]{0,2}([ ]? -[ ]?){3,}[ ]*$}mx', - '{^[ ]{0,2}([ ]? _[ ]?){3,}[ ]*$}mx'), + '{ + ^[ ]{0,3} # Leading space + ([*-_]) # $1: First marker + (?> # Repeated marker group + [ ]{0,2} # Zero, one, or two spaces. + \1 # Marker character + ){2,} # Group repeated at least twice + [ ]* # Tailing spaces + $ # End of line. + }mx', "\n".$this->hashBlock("empty_element_suffix")."\n", $text); } @@ -603,9 +572,9 @@ class Markdown_Parser { # These are all the transformations that occur *within* block-level # tags like paragraphs, headers, and list items. # - "escapeSpecialCharsWithinTagAttributes" => -20, - "doCodeSpans" => -10, - "encodeBackslashEscapes" => -5, + # Process character escapes, code spans, and inline HTML + # in one shot. + "parseSpan" => -30, # Process anchor and image tags. Images must come first, # because ![foo][f] looks like an anchor. @@ -636,35 +605,11 @@ class Markdown_Parser { function doHardBreaks($text) { # Do hard breaks: - $br_tag = $this->hashSpan("empty_element_suffix\n"); - return preg_replace('/ {2,}\n/', $br_tag, $text); + return preg_replace_callback('/ {2,}\n/', + array(&$this, '_doHardBreaks_callback'), $text); } - - - function escapeSpecialCharsWithinTagAttributes($text) { - # - # Within tags -- meaning between < and > -- encode [\ ` * _] so they - # don't conflict with their use in Markdown for code, italics and strong. - # We're replacing each such character with its corresponding MD5 checksum - # value; this is likely overkill, but it should prevent us from colliding - # with the escape values by accident. - # - if ($this->no_markup) return $text; - - $tokens = $this->tokenizeHTML($text); - $text = ''; # rebuild $text from the tokens - - foreach ($tokens as $cur_token) { - if ($cur_token[0] == 'tag') { -// $cur_token[1] = str_replace('\\', $this->escape_table['\\'], $cur_token[1]); -// $cur_token[1] = str_replace('`', $this->escape_table['`'], $cur_token[1]); -// $cur_token[1] = str_replace('*', $this->escape_table['*'], $cur_token[1]); -// $cur_token[1] = str_replace('_', $this->escape_table['_'], $cur_token[1]); - $cur_token[1] = $this->hashSpan($cur_token[1]); - } - $text .= $cur_token[1]; - } - return $text; + function _doHardBreaks_callback($matches) { + return $this->hashPart("empty_element_suffix\n"); } @@ -765,7 +710,7 @@ class Markdown_Parser { $link_text = $this->runSpanGamut($link_text); $result .= ">$link_text"; - $result = $this->hashSpan($result); + $result = $this->hashPart($result); } else { $result = $whole_match; @@ -790,7 +735,7 @@ class Markdown_Parser { $link_text = $this->runSpanGamut($link_text); $result .= ">$link_text"; - return $this->hashSpan($result); + return $this->hashPart($result); } @@ -867,7 +812,7 @@ class Markdown_Parser { $result .= " title=\"$title\""; } $result .= $this->empty_element_suffix; - $result = $this->hashSpan($result); + $result = $this->hashPart($result); } else { # If there's no such link ID, leave intact: @@ -890,7 +835,7 @@ class Markdown_Parser { } $result .= $this->empty_element_suffix; - return $this->hashSpan($result); + return $this->hashPart($result); } @@ -902,10 +847,8 @@ class Markdown_Parser { # Header 2 # -------- # - $text = preg_replace_callback('{ ^(.+?)[ ]*\n=+[ ]*\n+ }mx', - array(&$this, '_doHeaders_callback_setext_h1'), $text); - $text = preg_replace_callback('{ ^(.+?)[ ]*\n-+[ ]*\n+ }mx', - array(&$this, '_doHeaders_callback_setext_h2'), $text); + $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx', + array(&$this, '_doHeaders_callback_setext'), $text); # atx-style headers: # # Header 1 @@ -926,12 +869,9 @@ class Markdown_Parser { return $text; } - function _doHeaders_callback_setext_h1($matches) { - $block = "

".$this->runSpanGamut($matches[1])."

"; - return "\n" . $this->hashBlock($block) . "\n\n"; - } - function _doHeaders_callback_setext_h2($matches) { - $block = "

".$this->runSpanGamut($matches[1])."

"; + function _doHeaders_callback_setext($matches) { + $level = $matches[2]{0} == '=' ? 1 : 2; + $block = "".$this->runSpanGamut($matches[1]).""; return "\n" . $this->hashBlock($block) . "\n\n"; } function _doHeaders_callback_atx($matches) { @@ -1091,8 +1031,8 @@ class Markdown_Parser { $text = preg_replace_callback('{ (?:\n\n|\A) ( # $1 = the code block -- one or more lines, starting with a space/tab - (?: - (?:[ ]{'.$this->tab_width.'} | \t) # Lines must start with a tab or a tab-width of spaces + (?> + [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces .*\n+ )+ ) @@ -1105,82 +1045,23 @@ class Markdown_Parser { function _doCodeBlocks_callback($matches) { $codeblock = $matches[1]; - $codeblock = $this->encodeCode($this->outdent($codeblock)); -// $codeblock = $this->detab($codeblock); - # trim leading newlines and trailing whitespace + $codeblock = $this->outdent($codeblock); + $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); + + # trim leading newlines and trailing newlines $codeblock = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $codeblock); - $result = "\n\n".$this->hashBlock("
" . $codeblock . "\n
")."\n\n"; - - return $result; + $codeblock = "
$codeblock\n
"; + return "\n\n".$this->hashBlock($codeblock)."\n\n"; } - function doCodeSpans($text) { + function makeCodeSpan($code) { # - # * Backtick quotes are used for spans. + # Create a code span markup for $code. Called from handleSpanToken. # - # * You can use multiple backticks as the delimiters if you want to - # include literal backticks in the code span. So, this input: - # - # Just type ``foo `bar` baz`` at the prompt. - # - # Will translate to: - # - #

Just type foo `bar` baz at the prompt.

- # - # There's no arbitrary limit to the number of backticks you - # can use as delimters. If you need three consecutive backticks - # in your code, use four for delimiters, etc. - # - # * You can use spaces to get literal backticks at the edges: - # - # ... type `` `bar` `` ... - # - # Turns to: - # - # ... type `bar` ... - # - $text = preg_replace_callback('@ - (?encodeCode($c); - return $this->hashSpan("$c"); - } - - - function encodeCode($_) { - # - # Encode/escape certain characters inside Markdown code runs. - # The point is that in code, these characters are literals, - # and lose their special Markdown meanings. - # - # Encode all ampersands; HTML entities are not - # entities within a Markdown code span. - $_ = str_replace('&', '&', $_); - - # Do the angle bracket song and dance: - $_ = str_replace(array('<', '>'), - array('<', '>'), $_); - - # Now, escape characters that are magic in Markdown: -// $_ = str_replace(array_keys($this->escape_table), -// array_values($this->escape_table), $_); - - return $_; + $code = htmlspecialchars(trim($code), ENT_NOQUOTES); + return $this->hashPart("$code"); } @@ -1217,19 +1098,19 @@ class Markdown_Parser { function _doItalicAndBold_em_callback($matches) { $text = $matches[2]; $text = $this->runSpanGamut($text); - return $this->hashSpan("$text"); + return $this->hashPart("$text"); } function _doItalicAndBold_strong_callback($matches) { $text = $matches[2]; $text = $this->runSpanGamut($text); - return $this->hashSpan("$text"); + return $this->hashPart("$text"); } function doBlockQuotes($text) { $text = preg_replace_callback('/ ( # Wrap whole match in $1 - ( + (?> ^[ ]*>[ ]? # ">" at the start of a line .+\n # rest of the first line (.+\n)* # subsequent consecutive lines @@ -1273,24 +1154,21 @@ class Markdown_Parser { $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); # - # Wrap

tags. + # Wrap

tags and unhashify HTML blocks # foreach ($grafs as $key => $value) { - if (!isset( $this->html_blocks[$value] )) { + if (!preg_match('/^B\x1A[0-9]+B$/', $value)) { + # Is a paragraph. $value = $this->runSpanGamut($value); $value = preg_replace('/^([ ]*)/', "

", $value); $value .= "

"; $grafs[$key] = $this->unhash($value); } - } - - # - # Unhashify HTML blocks - # - foreach ($grafs as $key => $graf) { - # Modify elements of @grafs in-place... - if (isset($this->html_blocks[$graf])) { - $block = $this->html_blocks[$graf]; + else { + # Is a block. + # Modify elements of @grafs in-place... + $graf = $value; + $block = $this->html_hashes[$graf]; $graf = $block; // if (preg_match('{ // \A @@ -1355,20 +1233,6 @@ class Markdown_Parser { } - function encodeBackslashEscapes($text) { - # - # Parameter: String. - # Returns: The string, with after processing the following backslash - # escape sequences. - # - # Must process escaped backslashes first (should be first in list). - foreach ($this->backslash_escape_table as $search => $replacement) { - $text = str_replace($search, $this->hashSpan($replacement), $text); - } - return $text; - } - - function doAutoLinks($text) { $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}', array(&$this, '_doAutoLinks_url_callback'), $text); @@ -1391,12 +1255,12 @@ class Markdown_Parser { function _doAutoLinks_url_callback($matches) { $url = $this->encodeAmpsAndAngles($matches[1]); $link = "$url"; - return $this->hashSpan($link); + return $this->hashPart($link); } function _doAutoLinks_email_callback($matches) { $address = $matches[1]; $link = $this->encodeEmailAddress($address); - return $this->hashSpan($link); + return $this->hashPart($link); } @@ -1441,73 +1305,83 @@ class Markdown_Parser { } - function tokenizeHTML($str) { + function parseSpan($str) { # - # Parameter: String containing HTML + Markdown markup. - # Returns: An array of the tokens comprising the input - # string. Each token is either a tag or a run of text - # between tags. Each element of the array is a - # two-element array; the first is either 'tag' or 'text'; - # the second is the actual value. - # Note: Markdown code spans are taken into account: no tag token is - # generated within a code span. + # Take the string $str and parse it into tokens, hashing embeded HTML, + # escaped characters and handling code spans. # - $tokens = array(); - - while ($str != "") { - # - # Each loop iteration seach for either the next tag or the next - # openning code span marker. If a code span marker is found, the - # code span is extracted in entierty and will result in an extra - # text token. - # - $parts = preg_split('{ + $output = ''; + + $regex = '{ ( + \\\\['.preg_quote($this->escape_chars).'] + | (?no_markup ? '' : ' | # comment | <\?.*?\?> | <%.*?%> # processing instruction | <[/!$]?[-a-zA-Z0-9:]+ # regular tags - (?: + (?> \s (?>[^"\'>]+|"[^"]*"|\'[^\']*\')* )? > + ').' ) - }xs', $str, 2, PREG_SPLIT_DELIM_CAPTURE); + }xs'; + + while (1) { + # + # Each loop iteration seach for either the next tag, the next + # openning code span marker, or the next escaped character. + # Each token is then passed to handleSpanToken. + # + $parts = preg_split($regex, $str, 2, PREG_SPLIT_DELIM_CAPTURE); # Create token from text preceding tag. if ($parts[0] != "") { - $tokens[] = array('text', $parts[0]); + $output .= $parts[0]; } # Check if we reach the end. - if (count($parts) < 3) { - break; + if (isset($parts[1])) { + $output .= $this->handleSpanToken($parts[1], $parts[2]); + $str = $parts[2]; } - - # Create token from tag or code span. - if ($parts[1]{0} == "`") { - $tokens[] = array('text', $parts[1]); - $str = $parts[2]; - - # Skip the whole code span, pass as text token. - if (preg_match('/^(.*(?hashPart("&#". ord($token{1}). ";"); + case "`": + # Search for end marker in remaining text. + if (preg_match('/^(.*?[^`])'.$token.'(?!`)(.*)$/sm', + $str, $matches)) + { + $str = $matches[2]; + $codespan = $this->makeCodeSpan($matches[1]); + return $this->hashPart($codespan); + } + return $token; // return as text since no ending marker found. + default: + return $this->hashPart($token); + } } @@ -1515,7 +1389,7 @@ class Markdown_Parser { # # Remove one level of line-leading tabs or spaces # - return preg_replace("/^(\\t|[ ]{1,$this->tab_width})/m", "", $text); + return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text); } @@ -1531,26 +1405,46 @@ class Markdown_Parser { # tab characters. Then we reconstruct every line by adding the # appropriate number of space between each blocks. - $strlen = $this->utf8_strlen; # strlen function for UTF-8. - $lines = explode("\n", $text); - $text = ""; + $text = preg_replace_callback('/^.*\t.*$/m', + array(&$this, '_detab_callback'), $text); - foreach ($lines as $line) { - # Split in blocks. - $blocks = explode("\t", $line); - # Add each blocks to the line. - $line = $blocks[0]; - unset($blocks[0]); # Do not add first block twice. - foreach ($blocks as $block) { - # Calculate amount of space, insert spaces, insert block. - $amount = $this->tab_width - - $strlen($line, 'UTF-8') % $this->tab_width; - $line .= str_repeat(" ", $amount) . $block; - } - $text .= "$line\n"; - } +// $strlen = $this->utf8_strlen; # strlen function for UTF-8. +// $lines = explode("\n", $text); +// $text = ""; +// +// foreach ($lines as $line) { +// # Split in blocks. +// $blocks = explode("\t", $line); +// # Add each blocks to the line. +// $line = $blocks[0]; +// unset($blocks[0]); # Do not add first block twice. +// foreach ($blocks as $block) { +// # Calculate amount of space, insert spaces, insert block. +// $amount = $this->tab_width - +// $strlen($line, 'UTF-8') % $this->tab_width; +// $line .= str_repeat(" ", $amount) . $block; +// } +// $text .= "$line\n"; +// } return $text; } + function _detab_callback($matches) { + $line = $matches[0]; + $strlen = $this->utf8_strlen; # strlen function for UTF-8. + + # Split in blocks. + $blocks = explode("\t", $line); + # Add each blocks to the line. + $line = $blocks[0]; + unset($blocks[0]); # Do not add first block twice. + foreach ($blocks as $block) { + # Calculate amount of space, insert spaces, insert block. + $amount = $this->tab_width - + $strlen($line, 'UTF-8') % $this->tab_width; + $line .= str_repeat(" ", $amount) . $block; + } + return $line; + } function _initDetab() { # # Check for the availability of the function in the `utf8_strlen` property @@ -1569,13 +1463,15 @@ class Markdown_Parser { # # Swap back in all the tags hashed by _HashHTMLBlocks. # - return str_replace(array_keys($this->html_hashes), - array_values($this->html_hashes), $text); + return preg_replace_callback('/(.)\x1A[0-9]+\1/', + array(&$this, '_unhash_callback'), $text); + } + function _unhash_callback($matches) { + return $this->html_hashes[$matches[0]]; } } - /* PHP Markdown @@ -1618,6 +1514,8 @@ Version History See the readme file for detailed release notes for this version. +1.0.1i (28 Aug 2007) + 1.0.1h (3 Aug 2007) 1.0.1g (3 Jul 2007)