must go first:
$text = preg_replace_callback('{
( # $1: Marker
- (?:
@@ -1207,9 +1209,10 @@ class Markdown_Parser {
$bq = $this->runBlockGamut($bq); # recurse
$bq = preg_replace('/^/m', " ", $bq);
- # These leading spaces screw with content, so we need to fix that:
+ # These leading spaces cause problem with content,
+ # so we need to fix that:
$bq = preg_replace_callback('{(\s*.+?
)}sx',
- array(&$this, '_DoBlockQuotes_callback2'), $bq);
+ array(&$this, '_DoBlockQuotes_callback2'), $bq);
return $this->hashBlock("\n$bq\n
")."\n\n";
}
@@ -1245,52 +1248,46 @@ class Markdown_Parser {
#
# Unhashify HTML blocks
#
-// foreach ($grafs as $key => $value) {
-// if (isset( $this->html_blocks[$value] )) {
-// $grafs[$key] = $this->html_blocks[$value];
-// }
-// }
-
foreach ($grafs as $key => $graf) {
# Modify elements of @grafs in-place...
if (isset($this->html_blocks[$graf])) {
$block = $this->html_blocks[$graf];
$graf = $block;
- if (preg_match('{
- \A
- ( # $1 = tag
-
]*
- \b
- markdown\s*=\s* ([\'"]) # $2 = attr quote char
- 1
- \2
- [^>]*
- >
- )
- ( # $3 = contents
- .*
- )
- (
) # $4 = closing tag
- \z
- }xs', $block, $matches))
- {
- list(, $div_open, , $div_content, $div_close) = $matches;
-
- # We can't call Markdown(), because that resets the hash;
- # that initialization code should be pulled into its own sub, though.
- $div_content = $this->hashHTMLBlocks($div_content);
-
- # Run document gamut methods on the content.
- foreach ($this->document_gamut as $method => $priority) {
- $div_content = $this->$method($div_content);
- }
-
- $div_open = preg_replace(
- '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
-
- $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
- }
+// if (preg_match('{
+// \A
+// ( # $1 =
tag
+//
]*
+// \b
+// markdown\s*=\s* ([\'"]) # $2 = attr quote char
+// 1
+// \2
+// [^>]*
+// >
+// )
+// ( # $3 = contents
+// .*
+// )
+// (
) # $4 = closing tag
+// \z
+// }xs', $block, $matches))
+// {
+// list(, $div_open, , $div_content, $div_close) = $matches;
+//
+// # We can't call Markdown(), because that resets the hash;
+// # that initialization code should be pulled into its own sub, though.
+// $div_content = $this->hashHTMLBlocks($div_content);
+//
+// # Run document gamut methods on the content.
+// foreach ($this->document_gamut as $method => $priority) {
+// $div_content = $this->$method($div_content);
+// }
+//
+// $div_open = preg_replace(
+// '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
+//
+// $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
+// }
$grafs[$key] = $graf;
}
}
@@ -1403,21 +1400,23 @@ class Markdown_Parser {
function tokenizeHTML($str) {
#
- # Parameter: String containing HTML markup.
+ # Parameter: String containing HTML + Markdown markup.
# Returns: An array of the tokens comprising the input
- # string. Each token is either a tag (possibly with nested,
- # tags contained therein, such as
, or a
- # run of text between tags. Each element of the array is a
+ # string. Each token is either a tag or a run of text
+ # between tags. Each element of the array is a
# two-element array; the first is either 'tag' or 'text';
# the second is the actual value.
- # Note: Takes code spans into account and does not generate tag
- # tokens inside code spans.
+ # Note: Markdown code spans are taken into account: no tag token is
+ # generated within a code span.
#
$tokens = array();
while ($str != "") {
#
- #
+ # Each loop iteration seach for either the next tag or the next
+ # openning code span marker. If a code span marker is found, the
+ # code span is extracted in entierty and will result in an extra
+ # text token.
#
$parts = preg_split('{
(
@@ -1496,7 +1495,8 @@ class Markdown_Parser {
unset($blocks[0]); # Do not add first block twice.
foreach ($blocks as $block) {
# Calculate amount of space, insert spaces, insert block.
- $amount = $this->tab_width - strlen($line) % $this->tab_width;
+ $amount = $this->tab_width -
+ mb_strlen($line, 'UTF-8') % $this->tab_width;
$line .= str_repeat(" ", $amount) . $block;
}
$text .= "$line\n";
@@ -1558,73 +1558,7 @@ Version History
See the readme file for detailed release notes for this version.
-1.0.2b7 (16 Sep 2006)
-
-* Changed span and block gamut methods so that they loop over a
- customizable list of methods. This makes subclassing the parser a more
- interesting option for creating syntax extensions.
-
-* Also added a "document" gamut loop which can be used to hook document-level
- methods (like for striping link definitions).
-
-* Changed all methods which were inserting HTML code so that they now return
- a hashed representation of the code. New methods `hashSpan` and `hashBlock`
- are used to hash respectivly span- and block-level generated content. This
- has a couple of significant effects:
-
- 1. It prevents invalid nesting of Markdown-generated elements which
- could occur occuring with constructs like `*something [link*][1]`.
- 2. It prevents problems occuring with deeply nested lists on which
- paragraphs were ill-formed.
- 3. It removes the need to call `hashHTMLBlocks` twice during the the
- block gamut.
-
- Hashes are turned back to HTML prior output.
-
-* Made the block-level HTML parser smarter using a specially-crafted regular
- expression capable of handling nested tags.
-
-* Solved backtick issues in tag attributes by rewriting the HTML tokenizer to
- be aware of code spans. All these lines should work correctly now:
-
- bar
- bar
- ``
-
-* `` has been added to the list of block-level elements and is now
- treated as an HTML block instead of being wrapped within paragraph tags.
-
-* Now only trim trailing newlines from code blocks, instead of trimming
- all trailing whitespace characters.
-
-* Fixed bug where this:
-
- [text](http://m.com "title" )
-
- wasn't working as expected, because the parser wasn't allowing for spaces
- before the closing paren.
-
-* Filthy hack to support markdown='1' in div tags.
-
-* _DoAutoLinks() now supports the 'dict://' URL scheme.
-
-* PHP- and ASP-style processor instructions are now protected as
- raw HTML blocks.
-
- ... ?>
- <% ... %>
-
-* Experimental support for [this] as a synonym for [this][].
-
-* Fix for escaped backticks still triggering code spans:
-
- There are two raw backticks here: \` and here: \`, not a code span
-
-
-1.0.1oo (19 May 2006)
-
-* Converted PHP Markdown to a object-oriented design.
-
+1.0.1d (1 Dec 2006)
1.0.1c (9 Dec 2005)
@@ -1654,7 +1588,7 @@ Copyright (c) 2004-2006 Michel Fortin
All rights reserved.
-Copyright (c) 2003-2004 John Gruber
+Copyright (c) 2003-2006 John Gruber
All rights reserved.