settling in
Some checks failed
CI / Linting - PHP 7.4 (push) Has been cancelled
CI / Linting - PHP 8.0 (push) Has been cancelled
CI / Linting - PHP 8.1 (push) Has been cancelled
CI / Linting - PHP 8.2 (push) Has been cancelled

This commit is contained in:
Zander Thannhauser 2025-12-14 12:11:56 -06:00
parent bec3dca171
commit f487d02f1c
2 changed files with 286 additions and 61 deletions

13
test.md
View file

@ -6,6 +6,15 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nunc non fr
line 9 | 2
- | -
3 | 4
xyz
@ -76,7 +85,7 @@ tiptoe | 2
<caption> <ol> <li> 1 <li> 2 <li> 3 </ol> | foo
<caption> <ol> <li> 1 <li> 2 <li> 3 </ol> | foo88
<caption>foo<thead> bar | baz
| -
| bar
@ -105,7 +114,7 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nunc non fr
<table markdown="1">
|a
|a117
|-
</table>

326
test.py
View file

@ -424,39 +424,152 @@ def do_table_line(state, line):
def handle_table(m):
print("handle_table");
assert(not "TODO");
# matched = m.group(0);
# optional_table_open = m.group(1);
# one_or_more_header_lines = m.group(2);
# header_lines = one_or_more_header_lines.strip().split("\n")
# seperator_line = m.group(3);
# one_or_more_body_lines = m.group(4);
# body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")]
# optional_caption = m.group(5);
# assert(seperator_line is not None)
# try:
# # handle explicit table tag?
# if optional_table_open:
# open_tag = optional_table_open + "\n";
# else:
# # otherwise, add a default one:
# open_tag = "<table>" + "\n";
# inner = "";
# state = State(section_tag = "thead", default_cell_tag = "th");
# # Process the header lines:
# for line in header_lines:
# inner = do_table_line(state, line);
# if state.already_opened_section:
# inner += f"</{state.section_tag}>" "\n";
# # Handle line seperator:
# column_info = parse_colinfo(seperator_line);
# # Process the body lines:
# for lines in body_lines:
# state = State(section_tag = "tbody", \
# default_cell_tag = "td", \
# column_info = column_info);
# for line in lines:
# inner += do_table_line(state, line);
# if state.already_opened_section:
# inner += f"</{state.section_tag}>" "\n";
# # Consider the optional caption.
# # If it happens, it goes before everything else
# if optional_caption:
# inner = f"<caption> {optional_caption} </caption>\n" + inner;
# close_tag = "</table>\n";
# return "\n\n" + open_tag + inner + close_tag + "\n\n";
# except SyntaxError as e:
# print(f"caught syntax error: {e}");
# print("moving on to next table...");
# return m.group(0);
def handle_table_no_sep(m):
print("handle_table_no_sep");
assert(not "TODO");
# matched = m.group(0);
# print(f'matched = """{matched}"""');
# table_open_tag = m.group(1) + "\n";
# one_or_more_body_lines = m.group(2);
# body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")]
# optional_caption = m.group(3);
# try:
# inner = "";
# # Process the body lines:
# for lines in body_lines:
# state = State(section_tag = "tbody", \
# default_cell_tag = "td", \
# column_info = []);
# for line in lines:
# inner += do_table_line(state, line);
# if state.already_opened_section:
# inner += f"</{state.section_tag}>" "\n";
# # Consider the optional caption.
# # If it happens, it goes before everything else
# if optional_caption:
# inner = f"<caption> {optional_caption} </caption>\n" + inner;
# table_close_tag = "</table>\n";
# return "\n\n" + table_open_tag + inner + table_close_tag + "\n\n";
# except SyntaxError as e:
# print(f"caught syntax error: {e}");
# print("moving on to next table...");
# return m.group(0);
def handle_table_case_1(m):
print("handle_table_case_1");
matched = m.group(0);
optional_table_open = m.group(1);
print(f'matched = """{matched}"""');
one_or_more_header_lines = m.group(2);
# required open table tag:
table_open_tag = m.group(1);
header_lines = one_or_more_header_lines.strip().split("\n")
# remove the 'markdown="1"' syntax
table_open_tag = re.sub(r"markdown=(?:\"1\"|'1'|1)", "", table_open_tag);
# zero or more header rows:
header_rows = m.group(2);
# required seperator line:
seperator_line = m.group(3);
one_or_more_body_lines = m.group(4);
body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")]
# zero or more body rows, with empty lines of one:
body_rows = m.group(4);
# optional caption:
optional_caption = m.group(5);
assert(seperator_line is not None)
try:
# handle explicit table tag?
if optional_table_open:
open_tag = optional_table_open + "\n";
else:
# otherwise, add a default one:
open_tag = "<table>" + "\n";
inner = "";
# Process the (optional) header lines:
if header_rows is not None:
state = State(section_tag = "thead", default_cell_tag = "th");
# Process the header lines:
for line in header_lines:
inner = do_table_line(state, line);
for line in header_rows.strip().split('\n'):
inner += do_table_line(state, line);
if state.already_opened_section:
inner += f"</{state.section_tag}>" "\n";
@ -464,13 +577,13 @@ def handle_table(m):
# Handle line seperator:
column_info = parse_colinfo(seperator_line);
# Process the body lines:
for lines in body_lines:
if body_rows is not None and body_rows.strip():
for body in body_rows.strip().split('\n\n'):
state = State(section_tag = "tbody", \
default_cell_tag = "td", \
column_info = column_info);
for line in lines:
for line in body.strip().split('\n'):
inner += do_table_line(state, line);
if state.already_opened_section:
@ -481,39 +594,63 @@ def handle_table(m):
if optional_caption:
inner = f"<caption> {optional_caption} </caption>\n" + inner;
close_tag = "</table>\n";
table_open_tag = table_open_tag + "\n";
table_close_tag = "</table>\n";
return "\n\n" + open_tag + inner + close_tag + "\n\n";
return "\n\n" + table_open_tag + inner + table_close_tag + "\n";
except SyntaxError as e:
print(f"caught syntax error: {e}");
print("moving on to next table...");
return m.group(0);
def handle_table_no_sep(m):
print("handle_table_no_sep");
def handle_table_case_2(m):
print("handle_table_case_2");
matched = m.group(0);
print(f'matched = """{matched}"""');
table_open_tag = m.group(1) + "\n";
# no open table tag:
one_or_more_body_lines = m.group(2);
# one or more header rows:
header_rows = m.group(1);
body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")]
# line seperator:
seperator_line = m.group(2);
optional_caption = m.group(3);
print(f'seperator_line = "{seperator_line.strip()}"');
# one or more body rows, with empty lines of one:
body_rows = m.group(3);
print(f'body_rows = "{body_rows}"');
# optional caption:
optional_caption = m.group(4);
# no close table tag:
try:
inner = "";
# Process the body lines:
for lines in body_lines:
state = State(section_tag = "thead", default_cell_tag = "th");
# Process the required header lines:
for line in header_rows.strip().split('\n'):
inner += do_table_line(state, line);
if state.already_opened_section:
inner += f"</{state.section_tag}>" "\n";
# Handle line seperator:
column_info = parse_colinfo(seperator_line);
for body in body_rows.strip().split('\n\n'):
state = State(section_tag = "tbody", \
default_cell_tag = "td", \
column_info = []);
column_info = column_info);
for line in lines:
for line in body.strip().split('\n'):
inner += do_table_line(state, line);
if state.already_opened_section:
@ -524,10 +661,61 @@ def handle_table_no_sep(m):
if optional_caption:
inner = f"<caption> {optional_caption} </caption>\n" + inner;
table_open_tag = "<table>\n";
table_close_tag = "</table>\n";
return "\n\n" + table_open_tag + inner + table_close_tag + "\n\n";
return "\n\n" + table_open_tag + inner + table_close_tag + "\n";
except SyntaxError as e:
print(f"caught syntax error: {e}");
print("moving on to next table...");
return m.group(0);
def handle_table_case_3(m):
print("handle_table_case_3");
matched = m.group(0);
print(f'matched = """{matched}"""');
# required open table tag:
table_open_tag = m.group(1);
# remove the 'markdown="1"' syntax
table_open_tag = re.sub(r"markdown=(?:\"1\"|'1'|1)", "", table_open_tag);
# one or more body rows, with empty lines of one:
body_rows = m.group(2);
# no line seperator
# optional caption:
optional_caption = m.group(3);
# optional close table tag.
try:
inner = "";
for body in body_rows.strip().split('\n\n'):
state = State(section_tag = "tbody", \
default_cell_tag = "td", \
column_info = []);
for line in body.strip().split('\n'):
inner += do_table_line(state, line);
if state.already_opened_section:
inner += f"</{state.section_tag}>" "\n";
# Consider the optional caption.
# If it happens, it goes before everything else
if optional_caption:
inner = f"<caption> {optional_caption} </caption>\n" + inner;
table_open_tag = table_open_tag + "\n";
table_close_tag = "</table>\n";
return "\n\n" + table_open_tag + inner + table_close_tag + "\n";
except SyntaxError as e:
print(f"caught syntax error: {e}");
print("moving on to next table...");
@ -537,7 +725,7 @@ with open("test.md") as stream:
text = stream.read();
# delimiters between cells
delimiter = r"(?:[|]|<(?:tr|th)(?:[\s]+[^<>]*)?>)";
delimiter = r"(?:[|]|<(?:td|th)(?:[\s]+[^<>]*)?>)";
# A row is anything with at least one delimiter
row = fr"(?: .* {delimiter} .*)";
@ -560,35 +748,61 @@ row = fr"(?:{row} | {table_part}+)";
# Between the header rows and the body rows there is a line seperator.
seperator_line = r"\s* [|]? \s* [-=:]+ \s* (?: \s* [|] \s* [-=:]* \s* )* \s*"
# Regex for whole table:
for o, c in product((1, 0), repeat=2):
table = fr"""
table = fr"""
# two blank lines:
[\n]{{2}}
# optional or required open table tag:
(?:(<table[\s]+[^<>]*markdown=(?:"1"|'1'|1)[^<>]*>) \n){{{o},1}}
# required open table tag:
(?:(<table[\s]+[^<>]*markdown=(?:"1"|'1'|1)[^<>]*>) \n)
# zero or one or more header rows:
((?: {row} \n){{{1-o},}})
# zero or more header rows:
((?: {row} \n)+)
# line seperator:
({seperator_line}) [\n]
# required line seperator:
({seperator_line} [\n])
# zero or one or more body rows, with empty lines of one:
((?: {row} [\n]{{1,2}}){{{1-c},}})
# zero or more body rows, with empty lines of one:
((?: {row} [\n]{{1,2}})*)
# optional caption:
(?: \[ ([a-z0-9 "']+) \] \n)?
# optional or required close table tag:
(?: </table> [\n]){{{c},1}}
# optional close table tag:
(?: </table> [\n])?
# two blank lines (another newline already matched earlier)
[\n]{{1}}
""";
text = re.sub(table, handle_table_case_1, text, flags=re.VERBOSE)
table = fr"""
# two blank lines:
[\n]{{2}}
""";
text = re.sub(table, handle_table, text, flags=re.VERBOSE)
# no open table tag:
# (?:(<table[\s]+[^<>]*markdown=(?:"1"|'1'|1)[^<>]*>) \n)?
# one or more header rows:
((?: {row} \n)+)
# line seperator:
({seperator_line} [\n])
# one or more body rows, with empty lines of one:
((?: {row} [\n]{{1,2}})+)
# optional caption:
(?: \[ ([a-z0-9 "']+) \] \n)?
# no close table tag:
# (?: </table> [\n])?
# two blank lines (another newline already matched earlier)
[\n]{{1}}
""";
text = re.sub(table, handle_table_case_2, text, flags=re.VERBOSE)
table = fr"""
# two blank lines:
@ -600,17 +814,19 @@ table = fr"""
# one or more body rows, with empty lines of one:
((?: {row} [\n]{{1,2}})+)
# no line seperator
# optional caption:
(?: \[ ([a-z0-9 "']+) \] \n)?
# required close table tag:
(?: </table> [\n])
# optional close table tag:
(?: </table> [\n])?
# two blank lines:
[\n]{{2}}
# two blank lines (another newline already matched earlier)
[\n]{{1}}
""";
text = re.sub(table, handle_table_no_sep, text, flags=re.VERBOSE)
text = re.sub(table, handle_table_case_3, text, flags=re.VERBOSE)
text += """
<style>