This commit is contained in:
Zander Thannhauser 2025-12-07 12:44:46 -06:00
parent c67cccc31f
commit 4d6169ff59
3 changed files with 148 additions and 132 deletions

View file

@ -109,7 +109,7 @@ class MarkdownExtra extends \Michelf\Markdown {
public function __construct()
{
$this->enter(__FUNCTION__);
// Add extra escapable characters before parent constructor
// initialize the table.
$this->escape_chars .= ':|';
@ -123,22 +123,22 @@ class MarkdownExtra extends \Michelf\Markdown {
"stripAbbreviations" => 25,
"appendFootnotes" => 50,
);
$this->block_gamut += array(
"doFencedCodeBlocks" => 5,
"doTables" => 15,
"doDefLists" => 45,
);
$this->span_gamut += array(
"doFootnotes" => 5,
"doAbbreviations" => 70,
);
$this->enhanced_ordered_list = true;
parent::__construct();
$this->exit(__FUNCTION__);
}
@ -156,7 +156,7 @@ class MarkdownExtra extends \Michelf\Markdown {
protected array $tom_cells = array();
protected array $tom_cells_type = array();
protected array $tom_cells_stack = array();
/**
* Give the current footnote number.
*/
@ -173,7 +173,7 @@ class MarkdownExtra extends \Michelf\Markdown {
protected function setup()
{
$this->enter(__FUNCTION__);
parent::setup();
$this->footnotes = array();
@ -188,14 +188,14 @@ class MarkdownExtra extends \Michelf\Markdown {
$this->tom_cells = array();
$this->tom_cells_type = array();
$this->tom_cells_stack = array();
foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
if ($this->abbr_word_re)
$this->abbr_word_re .= '|';
$this->abbr_word_re .= preg_quote($abbr_word);
$this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
}
$this->exit(__FUNCTION__);
}
@ -204,7 +204,7 @@ class MarkdownExtra extends \Michelf\Markdown {
*/
protected function teardown() {
$this->enter(__FUNCTION__);
$this->footnotes = array();
$this->footnotes_ordered = array();
$this->footnotes_ref_count = array();
@ -216,7 +216,7 @@ class MarkdownExtra extends \Michelf\Markdown {
$this->footnotes_assembled = null;
parent::teardown();
$this->exit(__FUNCTION__);
}
@ -299,7 +299,7 @@ class MarkdownExtra extends \Michelf\Markdown {
protected function stripLinkDefinitions($text)
{
$this->enter(__FUNCTION__);
$less_than_tab = $this->tab_width - 1;
// Link defs are in the form: ^[id]: url "optional title"
@ -328,9 +328,9 @@ class MarkdownExtra extends \Michelf\Markdown {
}xm',
array($this, '_stripLinkDefinitions_callback'),
$text);
$this->exit(__FUNCTION__);
return $text;
}
@ -342,15 +342,15 @@ class MarkdownExtra extends \Michelf\Markdown {
protected function _stripLinkDefinitions_callback($matches)
{
$this->enter(__FUNCTION__);
$link_id = strtolower($matches[1]);
$url = $matches[2] == '' ? $matches[3] : $matches[2];
$this->urls[$link_id] = $url;
$this->titles[$link_id] =& $matches[4];
$this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
$this->exit(__FUNCTION__);
return ''; // String that will replace the block
}
@ -1129,7 +1129,7 @@ class MarkdownExtra extends \Michelf\Markdown {
array($this, '_doHeaders_callback_atx'), $text);
$this->exit(__FUNCTION__);
return $text;
}
@ -1177,7 +1177,7 @@ class MarkdownExtra extends \Michelf\Markdown {
protected function doTables($text)
{
$this->enter(__FUNCTION__);
$less_than_tab = $this->tab_width - 1;
// Find tables with leading pipe.
//
@ -1265,7 +1265,7 @@ class MarkdownExtra extends \Michelf\Markdown {
}
$classname = str_replace('%%', $alignname, $this->table_align_class_tmpl);
return " class=\"$classname\"";
}
@ -1277,9 +1277,9 @@ class MarkdownExtra extends \Michelf\Markdown {
protected function _doTable_callback($matches)
{
$this->enter(__FUNCTION__);
// array_shift($matches);
$head = $matches[1];
$underline = $matches[2];
$content = $matches[3];
@ -1347,33 +1347,33 @@ class MarkdownExtra extends \Michelf\Markdown {
// Write column headers.
$table_attr_str = $this->doExtraAttributes('table', $id_class, null, []);
$text = "<table$table_attr_str>\n";
$text = "<table$table_attr_str>\n";
$text .= "<thead>\n";
$text .= "<tr>\n";
foreach ($headers as $n => $header)
{
if (preg_match('/\[([a-zA-Z]+)\]/', $header, $matches))
if (preg_match('/\[@([a-zA-Z]+)\]/', $header, $matches))
{
$label = $matches[1];
if ( !in_array($label, $this->tom_cells_stack)
&& array_key_exists($label, $this->tom_cells))
{
array_push($this->tom_cells_stack, $label);
$local_content = $this->tom_cells[$label];
if ($this->tom_cells_type[$label] == "span")
{
$processed = $this->runSpanGamut($local_content);
}
else
{
$processed = $this->runBasicBlockGamut($local_content);
}
array_pop($this->tom_cells_stack);
array_push($this->tom_cells_stack, $label);
$local_content = $this->tom_cells[$label];
if ($this->tom_cells_type[$label] == "span")
{
$processed = $this->runSpanGamut($local_content);
}
else
{
$processed = $this->runBasicBlockGamut($local_content);
}
array_pop($this->tom_cells_stack);
$text .= " <th$attr[$n]>" . $processed . "</th>\n";
}
else
@ -1409,14 +1409,14 @@ class MarkdownExtra extends \Michelf\Markdown {
if (preg_match('/ *\[([a-zA-Z]+)\] */', $cell, $matches))
{
$label = $matches[1];
if ( !in_array($label, $this->tom_cells_stack)
&& array_key_exists($label, $this->tom_cells))
{
array_push($this->tom_cells_stack, $label);
$local_content = $this->tom_cells[$label];
if ($this->tom_cells_type[$label] == "span")
{
$processed = $this->runSpanGamut($local_content);
@ -1425,9 +1425,9 @@ class MarkdownExtra extends \Michelf\Markdown {
{
$processed = $this->runBasicBlockGamut($local_content);
}
array_pop($this->tom_cells_stack);
$text .= " <{$column_tags[$n]}$attr[$n]>" . $processed . "</td>\n";
}
else
@ -1440,14 +1440,14 @@ class MarkdownExtra extends \Michelf\Markdown {
$text .= " <{$column_tags[$n]}$attr[$n]>" . $this->runSpanGamut(trim($cell)) . "</td>\n";
}
}
$text .= "</tr>\n";
}
$text .= "</tbody>\n";
$text .= "</table>";
$this->exit(__FUNCTION__);
return $this->hashBlock($text) . "\n";
}
@ -1459,7 +1459,7 @@ class MarkdownExtra extends \Michelf\Markdown {
protected function doDefLists($text)
{
$this->enter(__FUNCTION__);
$less_than_tab = $this->tab_width - 1;
// Re-usable pattern to match any entire dl list:
@ -1497,7 +1497,7 @@ class MarkdownExtra extends \Michelf\Markdown {
array($this, '_doDefLists_callback'), $text);
$this->exit(__FUNCTION__);
return $text;
}
@ -1508,7 +1508,7 @@ class MarkdownExtra extends \Michelf\Markdown {
*/
protected function _doDefLists_callback($matches) {
$this->enter(__FUNCTION__);
// Re-usable patterns to match list item bullets and number markers:
$list = $matches[1];
@ -1516,9 +1516,9 @@ class MarkdownExtra extends \Michelf\Markdown {
// paragraph for the last item in a list, if necessary:
$result = trim($this->processDefListItems($list));
$result = "<dl>\n" . $result . "\n</dl>";
$this->exit(__FUNCTION__);
return $this->hashBlock($result) . "\n\n";
}
@ -1530,7 +1530,7 @@ class MarkdownExtra extends \Michelf\Markdown {
*/
protected function processDefListItems($list_str) {
$this->enter(__FUNCTION__);
$less_than_tab = $this->tab_width - 1;
@ -1569,7 +1569,7 @@ class MarkdownExtra extends \Michelf\Markdown {
array($this, '_processDefListItems_callback_dd'), $list_str);
$this->exit(__FUNCTION__);
return $list_str;
}
@ -1580,16 +1580,16 @@ class MarkdownExtra extends \Michelf\Markdown {
*/
protected function _processDefListItems_callback_dt($matches) {
$this->enter(__FUNCTION__);
$terms = explode("\n", trim($matches[1]));
$text = '';
foreach ($terms as $term) {
$term = $this->runSpanGamut(trim($term));
$text .= "\n<dt>" . $term . "</dt>";
}
$this->exit(__FUNCTION__);
return $text . "\n";
}
@ -1600,7 +1600,7 @@ class MarkdownExtra extends \Michelf\Markdown {
*/
protected function _processDefListItems_callback_dd($matches) {
$this->enter(__FUNCTION__);
$leading_line = $matches[1];
$marker_space = $matches[2];
$def = $matches[3];
@ -1617,7 +1617,7 @@ class MarkdownExtra extends \Michelf\Markdown {
}
$this->exit(__FUNCTION__);
return "\n<dd>" . $def . "</dd>\n";
}
@ -1633,7 +1633,7 @@ class MarkdownExtra extends \Michelf\Markdown {
*/
protected function doFencedCodeBlocks($text) {
$this->enter(__FUNCTION__);
$text = preg_replace_callback('{
(?:\n|\A)
# 1: Opening marker
@ -1664,7 +1664,7 @@ class MarkdownExtra extends \Michelf\Markdown {
array($this, '_doFencedCodeBlocks_callback'), $text);
$this->exit(__FUNCTION__);
return $text;
}
@ -1777,7 +1777,7 @@ class MarkdownExtra extends \Michelf\Markdown {
*/
protected function stripFootnotes($text) {
$this->enter(__FUNCTION__);
$less_than_tab = $this->tab_width - 1;
// Link defs are in the form: [^id]: url "optional title"
@ -1798,9 +1798,9 @@ class MarkdownExtra extends \Michelf\Markdown {
}xm',
array($this, '_stripFootnotes_callback'),
$text);
$this->exit(__FUNCTION__);
return $text;
}
@ -1812,79 +1812,79 @@ class MarkdownExtra extends \Michelf\Markdown {
protected function _stripFootnotes_callback($matches)
{
$this->enter(__FUNCTION__);
$note_id = $this->fn_id_prefix . $matches[1];
$content = $this->footnotes[$note_id] = $this->outdent($matches[2]);
$this->zprint("note_id = $note_id");
$this->zprint("content = $content");
$this->exit(__FUNCTION__);
return ''; // String that will replace the block
}
protected function stripTomCells($text) {
$this->enter(__FUNCTION__);
$less_than_tab = $this->tab_width - 1;
$text = preg_replace_callback('{
^[ ]{0,' . $less_than_tab . '}\[(.+?)\][ ]?: [ ]* \n
^[ ]{0,' . $less_than_tab . '}\[@(.+?)\][ ]?: [ ]* \n
(
(?>
(?!^\[\1\])
(?!^\[@\1\])
.*\n+
)+
)
# Closing marker.
\[\1\][ ]* (?= \n )
\[@\1\][ ]* (?= \n )
}xm',
array($this, '_stripTomCells_block_callback'),
$text);
$text = preg_replace_callback('{
^[ ]{0,' . $less_than_tab . '}\[(.+?)\][ ]?: (.+) \[\1\][ ]* (?= \n )
}xm',
array($this, '_stripTomCells_span_callback'),
$text);
$this->exit(__FUNCTION__);
return $text;
}
protected function _stripTomCells_span_callback($matches)
{
$this->enter(__FUNCTION__);
$this->enter(__FUNCTION__);
$this->zprint("matches[1] = {$matches[1]}");
$this->zprint("matches[2] = {$matches[2]}");
$this->zprint("matches[1] = {$matches[1]}");
$this->zprint("matches[2] = {$matches[2]}");
$this->tom_cells[$matches[1]] = trim($matches[2]);
$this->tom_cells_type[$matches[1]] = "span";
$this->tom_cells[$matches[1]] = trim($matches[2]);
$this->tom_cells_type[$matches[1]] = "span";
$this->exit(__FUNCTION__);
return '';
}
protected function _stripTomCells_block_callback($matches)
{
$this->enter(__FUNCTION__);
$this->enter(__FUNCTION__);
$this->zprint("matches[1] = {$matches[1]}");
$this->zprint("matches[2] = {$matches[2]}");
$this->zprint("matches[1] = {$matches[1]}");
$this->zprint("matches[2] = {$matches[2]}");
$this->tom_cells[$matches[1]] = $this->outdent($matches[2]);
$this->tom_cells_type[$matches[1]] = "block";
$this->exit(__FUNCTION__);
$this->tom_cells[$matches[1]] = $this->outdent($matches[2]);
$this->tom_cells_type[$matches[1]] = "block";
$this->exit(__FUNCTION__);
return '';
}
@ -1908,13 +1908,13 @@ class MarkdownExtra extends \Michelf\Markdown {
*/
protected function appendFootnotes($text) {
$this->enter(__FUNCTION__);
$text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
array($this, '_appendFootnotes_callback'), $text);
if ( ! empty( $this->footnotes_ordered ) ) {
$this->_doFootnotes();
if ( ! $this->omit_footnotes ) {
$text .= "\n\n";
$text .= "<div class=\"footnotes\" role=\"doc-endnotes\">\n";
@ -1923,9 +1923,9 @@ class MarkdownExtra extends \Michelf\Markdown {
$text .= "</div>";
}
}
$this->exit(__FUNCTION__);
return $text;
}
@ -1937,7 +1937,7 @@ class MarkdownExtra extends \Michelf\Markdown {
*/
protected function _doFootnotes() {
$this->enter(__FUNCTION__);
$attr = array();
if ($this->fn_backlink_class !== "") {
$class = $this->fn_backlink_class;
@ -2007,7 +2007,7 @@ class MarkdownExtra extends \Michelf\Markdown {
$text .= "</ol>\n";
$this->footnotes_assembled = $text;
$this->exit(__FUNCTION__);
}
@ -2085,7 +2085,7 @@ class MarkdownExtra extends \Michelf\Markdown {
*/
protected function stripAbbreviations($text) {
$this->enter(__FUNCTION__);
$less_than_tab = $this->tab_width - 1;
// Link defs are in the form: [id]*: url "optional title"
@ -2095,9 +2095,9 @@ class MarkdownExtra extends \Michelf\Markdown {
}xm',
array($this, '_stripAbbreviations_callback'),
$text);
$this->exit(__FUNCTION__);
return $text;
}
@ -2108,7 +2108,7 @@ class MarkdownExtra extends \Michelf\Markdown {
*/
protected function _stripAbbreviations_callback($matches) {
$this->enter(__FUNCTION__);
$abbr_word = $matches[1];
$abbr_desc = $matches[2];
if ($this->abbr_word_re) {
@ -2116,9 +2116,9 @@ class MarkdownExtra extends \Michelf\Markdown {
}
$this->abbr_word_re .= preg_quote($abbr_word);
$this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
$this->exit(__FUNCTION__);
return ''; // String that will replace the block
}

View file

@ -9,12 +9,12 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nunc non fr
<table q=w>
<table q=w markdown="1">
| a | b | <p> c </p> <p> d </p> |
| - | :-: | - |
<caption> 1 2 3 | 4 5 6
<caption> q w e r t y <tr a="b"> | d | e | f |
| g <th a="b" align="left"> h </th> i |
| g <th a="b" style="text-align: left"> h </th> i |
<tr> x | y | z </tr>
x | y | z </tr> </tbody>
[ def ]
@ -58,7 +58,7 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nunc non fr
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nunc non fringilla pulvinar, augue arcu tempor lacus, vel gravida justo sapien vel nibh. Curabitur eget dignissim lorem. Vivamus sit amet urna nec lorem aliquet viverra. Aliquam erat volutpat. Maecenas sed ante vitae erat feugiat faucibus. Integer porttitor nibh eu risus suscipit, ut efficitur mauris sollicitudin. Suspendisse potenti. Integer feugiat mi sed ligula sagittis, id sagittis justo ullamcorper.
<table>
<table markdown="1">
|a
|-
</table>
@ -66,7 +66,7 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nunc non fr
<table>
<table markdown="1">
|-
|b
</table>

64
test.py
View file

@ -97,7 +97,7 @@ def do_table_line(state, line):
# it ourselves
print("found our start tag");
out += m.group(0);
out += m.group(0) + "\n";
line = line[len(m.group(0)):];
@ -119,7 +119,7 @@ def do_table_line(state, line):
out += f"<{state.section_tag}>";
state.already_opened_section = 1;
out += m.group(0);
out += m.group(0) + "\n";
line = line[len(m.group(0)):];
@ -160,7 +160,7 @@ def do_table_line(state, line):
print(f'line = "{line}"');
out += "</caption>";
out += "</caption>" + "\n";
continue;
@ -168,7 +168,7 @@ def do_table_line(state, line):
if (m := re.match(passthrough_pattern, line)):
print(f'found passthrough tag: "{m.group(0)}"');
out += m.group(0);
out += m.group(0) + "\n";
line = line[len(m.group(0)):];
@ -187,11 +187,11 @@ def do_table_line(state, line):
column_index = 0;
if not state.already_opened_section:
out += f"<{state.section_tag}>";
out += f"<{state.section_tag}>" + "\n";
state.already_opened_section = 1;
if not already_open_tr:
out += "<tr>";
out += "<tr>" + "\n";
while line:
print("new cell");
@ -206,7 +206,7 @@ def do_table_line(state, line):
align = state.column_info[column_index]['align'];
if align != 'default':
attributes['align'] = align;
attributes['style'] = f'text-align: {align}';
print(f'attributes = "{attributes}"');
@ -225,7 +225,8 @@ def do_table_line(state, line):
line = line[1:];
print(f'line = "{line}"');
elif (m := re.match(r"<([a-z]+)(?:[\s]+([^<>]*))?>", line)):
# elif (m := re.match(r"<([a-z]+)(?:[\s]+([^<>]*))?>", line)):
elif (m := re.match(r"<(th|td)(?:[\s]+([^<>]*))?>", line)):
print("found opening HTML tag");
tag = m.group(1);
@ -262,16 +263,18 @@ def do_table_line(state, line):
line = line[len(m.group(0)):];
print(f'line = "{line}"');
elif (m := re.match(r"<([a-z]+)", line)):
print("found HTML open, but it's incomplete? huh?! throwing!");
# elif (m := re.match(r"<([a-z]+)", line)):
# print("found HTML open, but it's incomplete? huh?! throwing!");
raise SyntaxError("could not find '>' for HTML open tag");
# raise SyntaxError("could not find '>' for HTML open tag");
elif column_index < len(state.column_info):
print("found nothing, defaulting to column info");
tag = state.column_info[column_index]['default-tag']
print(f'tag = "{tag}"');
print(f'line = "{line}"');
else:
print(f"found nothing, defaulting default_tag ({state.default_cell_tag})");
@ -279,6 +282,8 @@ def do_table_line(state, line):
print(f'tag = "{tag}"');
print(f'line = "{line}"');
print("looking for closer");
content = "";
@ -358,20 +363,26 @@ def do_table_line(state, line):
column_index += 1;
# end the row of content
out += "\n";
# close tr on its own line
out += "</tr>" + "\n";
print(f'out = "{out}"');
return out;
def do_table(table_open_tag, header_lines, seperator_line, body_lines, optional_caption):
out = "";
# handle explicit table tag?
if table_open_tag:
out += table_open_tag + "\n";
open_tag = table_open_tag + "\n";
else:
# otherwise, add a default one:
out += "<table>" + "\n";
open_tag = "<table>" + "\n";
inner = "";
state = State(section_tag = "thead", default_cell_tag = "th");
@ -379,7 +390,10 @@ def do_table(table_open_tag, header_lines, seperator_line, body_lines, optional_
for line in header_lines:
html_table_line = do_table_line(state, line);
out += html_table_line + "\n";
inner += html_table_line;
if state.already_opened_section:
inner += "</thead>" "\n";
# Handle line seperator:
column_info = parse_colinfo(seperator_line);
@ -391,20 +405,22 @@ def do_table(table_open_tag, header_lines, seperator_line, body_lines, optional_
column_info = column_info);
for line in lines:
html_table_line = do_table_line(state, line);
inner += do_table_line(state, line);
out += html_table_line + "\n";
if state.already_opened_section:
inner += "</tbody>" "\n";
# Consider the optional caption
# Consider the optional caption.
# If it happens, it goes before everything else
if optional_caption:
out += f"<caption> {optional_caption} </caption>\n";
inner = f"<caption> {optional_caption} </caption>\n" + inner;
out += "</table>\n";
close_tag = "</table>\n";
for o in out.split("\n"):
for o in inner.split("\n"):
print(o);
return "\n\n" + out + "\n\n";
return "\n\n" + open_tag + inner + close_tag + "\n\n";
def handle_table(m):
print("handle_table");
@ -473,7 +489,7 @@ for o, c in product((1, 0), repeat=2):
[\n]{{2}}
# optional or required open table tag:
(?:(<table(?:[\s]+[^<>]*)?>) \n){{{o},1}}
(?:(<table[\s]+[^<>]*markdown="1"[^<>]*>) \n){{{o},1}}
# zero or one or more header rows:
((?: {row} \n){{{1-o},}})