Merge branch 'master' into extra

Conflicts: PHP Markdown Extra Readme.text
2009-10-01 14:01:35 -04:00 · 2009-10-01 14:01:35 -04:00 · c04811a89b
commit c04811a89b
parent 45b858661f d64613d64a
2 changed files with 80 additions and 26 deletions
--- a/Readme.text
+++ b/Readme.text
@ -204,6 +204,38 @@ expected; (3) the output PHP Markdown actually produced.
 Version History
 ---------------

+Current:
+
+*	Fix for adjacent list of different kind where the second list could
+	end as a sublist of the first when not separated by an empty line.
+
+*	Now accepting many valid email addresses in autolinks that were 
+	previously rejected, such as:
+	
+		<abc+mailbox/department=shipping@example.com>
+		<!#$%&'*+-/=?^_`.{|}~@example.com>
+		<"abc@def"@example.com>
+		<"Fred Bloggs"@example.com>
+		<jsmith@[192.0.2.1]>
+
+*	Now accepting spaces in URLs for inline and reference-style links. Such 
+	URLs need to be surrounded by angle brakets. For instance:
+	
+		[link text](<http://url/with space> "optional title")
+
+		[link text][ref]
+		[ref]: <http://url/with space> "optional title"
+	
+	There is still a quirk which may prevent this from working correctly with 
+	relative URLs in inline-style links however.
+
+*	Fixed a bug where inline-style links wouldn't be recognized when the link 
+	definition contains a line break between the url and the title.
+
+*	Fixed a bug where tags where the name contains an underscore aren't parsed 
+	correctly.
+
+
 Current Extra:

 *	Fixed a problem where unterminated tags in indented code blocks could 
--- a/markdown.php
+++ b/markdown.php
@ -356,14 +356,18 @@ class Markdown_Parser {
 							  [ ]*
 							  \n?				# maybe *one* newline
 							  [ ]*
-							<?(\S+?)>?			# url = $2
+							(?:
+							  <(.+?)>			# url = $2
+							|
+							  (\S+?)			# url = $3
+							)
 							  [ ]*
 							  \n?				# maybe one newline
 							  [ ]*
 							(?:
 								(?<=\s)			# lookbehind for whitespace
 								["(]
-								(.*?)			# title = $3
+								(.*?)			# title = $4
 								[")]
 								[ ]*
 							)?	# title is optional
@ -375,8 +379,9 @@ class Markdown_Parser {
 	}
 	function _stripLinkDefinitions_callback($matches) {
 		$link_id = strtolower($matches[1]);
-		$this->urls[$link_id] = $matches[2];
-		$this->titles[$link_id] =& $matches[3];
+		$url = $matches[2] == '' ? $matches[3] : $matches[2];
+		$this->urls[$link_id] = $url;
+		$this->titles[$link_id] =& $matches[4];
 		return ''; # String that will replace the block
 	}

@ -699,28 +704,28 @@ class Markdown_Parser {
 				('.$this->nested_brackets_re.')	# link text = $2
 			  \]
 			  \(			# literal paren
-				[ ]*
+				[ \n]*
 				(?:
-					<(\S*)>	# href = $3
+					<(.+?)>	# href = $3
 				|
 					('.$this->nested_url_parenthesis_re.')	# href = $4
 				)
-				[ ]*
+				[ \n]*
 				(			# $5
 				  ([\'"])	# quote char = $6
 				  (.*?)		# Title = $7
 				  \6		# matching quote
-				  [ ]*	# ignore any spaces/tabs between closing quote and )
+				  [ \n]*	# ignore any spaces/tabs between closing quote and )
 				)?			# title is optional
 			  \)
 			)
 			}xs',
-			array(&$this, '_DoAnchors_inline_callback'), $text);
+			array(&$this, '_doAnchors_inline_callback'), $text);

 		#
 		# Last, handle reference-style shortcuts: [link text]
-		# These must come last in case you've also got [link test][1]
-		# or [link test](/foo)
+		# These must come last in case you've also got [link text][1]
+		# or [link text](/foo)
 		#
 //		$text = preg_replace_callback('{
 //			(					# wrap whole match in $1
@ -824,18 +829,18 @@ class Markdown_Parser {
 			  \]
 			  \s?			# One optional whitespace character
 			  \(			# literal paren
-				[ ]*
+				[ \n]*
 				(?:
 					<(\S*)>	# src url = $3
 				|
 					('.$this->nested_url_parenthesis_re.')	# src url = $4
 				)
-				[ ]*
+				[ \n]*
 				(			# $5
 				  ([\'"])	# quote char = $6
 				  (.*?)		# title = $7
 				  \6		# matching quote
-				  [ ]*
+				  [ \n]*
 				)?			# title is optional
 			  \)
 			)
@ -948,19 +953,22 @@ class Markdown_Parser {
 		$marker_ol_re  = '\d+[.]';
 		$marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";

-		$markers_relist = array($marker_ul_re, $marker_ol_re);
+		$markers_relist = array(
+			$marker_ul_re => $marker_ol_re,
+			$marker_ol_re => $marker_ul_re,
+			);

-		foreach ($markers_relist as $marker_re) {
+		foreach ($markers_relist as $marker_re => $other_marker_re) {
 			# Re-usable pattern to match any entirel ul or ol list:
 			$whole_list_re = '
 				(								# $1 = whole list
 				  (								# $2
-					[ ]{0,'.$less_than_tab.'}
-					('.$marker_re.')			# $3 = first list item marker
+					([ ]{0,'.$less_than_tab.'})	# $3 = number of spaces
+					('.$marker_re.')			# $4 = first list item marker
 					[ ]+
 				  )
 				  (?s:.+?)
-				  (								# $4
+				  (								# $5
 					  \z
 					|
 					  \n{2,}
@ -969,6 +977,12 @@ class Markdown_Parser {
 						[ ]*
 						'.$marker_re.'[ ]+
 					  )
+					|
+					  (?=						# Lookahead for another kind of list
+					    \n
+						\3						# Must have the same indentation
+						'.$other_marker_re.'[ ]+
+					  )
 				  )
 				)
 			'; // mx
@ -1001,7 +1015,7 @@ class Markdown_Parser {
 		$marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
 		
 		$list = $matches[1];
-		$list_type = preg_match("/$marker_ul_re/", $matches[3]) ? "ul" : "ol";
+		$list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
 		
 		$marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
 		
@ -1145,7 +1159,7 @@ class Markdown_Parser {
 	
 	function prepareItalicsAndBold() {
 	#
-	# Prepare regular expressions for seraching emphasis tokens in any
+	# Prepare regular expressions for searching emphasis tokens in any
 	# context.
 	#
 		foreach ($this->em_relist as $em => $em_re) {
@ -1180,7 +1194,7 @@ class Markdown_Parser {
 			$token_re = $this->em_strong_prepared_relist["$em$strong"];
 			
 			#
-			# Each loop iteration seach for the next emphasis token. 
+			# Each loop iteration search for the next emphasis token. 
 			# Each token is then passed to handleSpanToken.
 			#
 			$parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
@ -1313,7 +1327,7 @@ class Markdown_Parser {
 		# These leading spaces cause problem with <pre> content, 
 		# so we need to fix that:
 		$bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 
-			array(&$this, '_DoBlockQuotes_callback2'), $bq);
+			array(&$this, '_doBlockQuotes_callback2'), $bq);

 		return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
 	}
@ -1435,9 +1449,17 @@ class Markdown_Parser {
 			<
 			(?:mailto:)?
 			(
-				[-.\w\x80-\xFF]+
+				(?:
+					[-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
+				|
+					".*?"
+				)
 				\@
-				[-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
+				(?:
+					[-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
+				|
+					\[[\d.a-fA-F:]+\]	# IPv4 & IPv6
+				)
 			)
 			>
 			}xi',
@ -1517,7 +1539,7 @@ class Markdown_Parser {
 				|
 					<\?.*?\?> | <%.*?%>		# processing instruction
 				|
-					<[/!$]?[-a-zA-Z0-9:]+	# regular tags
+					<[/!$]?[-a-zA-Z0-9:_]+	# regular tags
 					(?>
 						\s
 						(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*