This commit is contained in:
Zander Thannhauser 2025-12-07 15:43:36 -06:00
parent 4d6169ff59
commit dcbf8db706
3 changed files with 219 additions and 71 deletions

View file

@ -28,6 +28,7 @@
gedit
python3
meld
zip
];
};
});

46
test.md
View file

@ -30,6 +30,52 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nunc non fr
<table markdown=1>
tiptoe | 2
3 | 4
</table>
<table markdown=1>
tiptoe | 2
3 | 4
5 | 6
[duh]
</table>
<table markdown=1>
<tfoot>
1 | 2
<tbody>
3 | 4
<thead>
5 | 6
</table>
<table markdown=1>
1 | 2
3 | 4
5 | 6
[duh]
</table>
<caption> <ol> <li> 1 <li> 2 <li> 3 </ol> | foo
<caption>foo<thead> bar | baz
| -

243
test.py
View file

@ -67,6 +67,7 @@ def do_table_line(state, line):
I'm supposed to return the whole HTML.
Including the <tr> if needed.
"""
print("do_table_line");
print(f'line = "{line}"');
@ -74,7 +75,7 @@ def do_table_line(state, line):
passthrough_pattern = fr"(?:<{tags}(?:[\s]+[^>]*)?>|</{tags}>)";
start_tag_pattern = fr"<{state.section_tag}(?:[\s]+[^>]*)?>"
start_tag_pattern = fr"<(thead|tbody|tfoot)(?:[\s]+[^>]*)?>"
open_tr_pattern = r"<tr(?:[\s]+[^<>]*)?>[\s]*";
@ -84,7 +85,7 @@ def do_table_line(state, line):
caption_sentinel_pattern = "(?:" + \
'|'.join((cell_delimiter, passthrough_pattern, start_tag_pattern,
open_tr_pattern, open_caption_pattern)) + ")"
open_tr_pattern, open_caption_pattern, r'</?table.*')) + ")"
already_open_tr = 0;
@ -94,8 +95,13 @@ def do_table_line(state, line):
# Is it whatever our start tag is?
if (m := re.match(start_tag_pattern, line)):
# we'll pass this through, and remember that we don't need to do
# it ourselves
print("found our start tag");
# it ourselves also possibly close previous section also change
# "section_tag" to be thead
print(f"found our start tag: '{m.group(0)}'");
# if we're already open, close whatever that was
if state.already_opened_section:
out += f'</{state.section_tag}>' + "\n";
out += m.group(0) + "\n";
@ -103,6 +109,7 @@ def do_table_line(state, line):
print(f'line = "{line}"');
state.section_tag = m.group(1);
state.already_opened_section = 1;
continue;
@ -225,7 +232,6 @@ def do_table_line(state, line):
line = line[1:];
print(f'line = "{line}"');
# elif (m := re.match(r"<([a-z]+)(?:[\s]+([^<>]*))?>", line)):
elif (m := re.match(r"<(th|td)(?:[\s]+([^<>]*))?>", line)):
print("found opening HTML tag");
@ -263,10 +269,10 @@ def do_table_line(state, line):
line = line[len(m.group(0)):];
print(f'line = "{line}"');
# elif (m := re.match(r"<([a-z]+)", line)):
# print("found HTML open, but it's incomplete? huh?! throwing!");
elif (m := re.match(r"<(th|td)", line)):
print("found HTML open, but it's incomplete? huh?! throwing!");
# raise SyntaxError("could not find '>' for HTML open tag");
raise SyntaxError("could not find '>' for HTML open tag");
elif column_index < len(state.column_info):
print("found nothing, defaulting to column info");
@ -328,10 +334,6 @@ def do_table_line(state, line):
line = line[len(m.group(0)):]
print(f'line = "{line}"');
print(f'depth = {depth}');
# elif (m := re.match(fr"</{tags}>", line)):
# # ignore the closers for table parts, no passthrough
# line = line[len(m.group(0)):]
# print(f'line = "{line}"');
elif (m := re.match("<[a-z]+(?:\\s+[^<>]*)?>", line)):
content += m.group(0);
line = line[len(m.group(0)):]
@ -373,83 +375,160 @@ def do_table_line(state, line):
return out;
def do_table(table_open_tag, header_lines, seperator_line, body_lines, optional_caption):
# handle explicit table tag?
if table_open_tag:
open_tag = table_open_tag + "\n";
else:
# otherwise, add a default one:
open_tag = "<table>" + "\n";
inner = "";
state = State(section_tag = "thead", default_cell_tag = "th");
# Process the header lines:
for line in header_lines:
html_table_line = do_table_line(state, line);
inner += html_table_line;
if state.already_opened_section:
inner += "</thead>" "\n";
# Handle line seperator:
column_info = parse_colinfo(seperator_line);
# Process the body lines:
for lines in body_lines:
state = State(section_tag = "tbody", \
default_cell_tag = "td", \
column_info = column_info);
for line in lines:
inner += do_table_line(state, line);
if state.already_opened_section:
inner += "</tbody>" "\n";
# Consider the optional caption.
# If it happens, it goes before everything else
if optional_caption:
inner = f"<caption> {optional_caption} </caption>\n" + inner;
close_tag = "</table>\n";
for o in inner.split("\n"):
print(o);
return "\n\n" + open_tag + inner + close_tag + "\n\n";
# def do_table(table_open_tag, header_lines, seperator_line, body_lines, optional_caption):
#
# # handle explicit table tag?
# if table_open_tag:
# open_tag = table_open_tag + "\n";
# else:
# # otherwise, add a default one:
# open_tag = "<table>" + "\n";
#
# inner = "";
#
# state = State(section_tag = "thead", default_cell_tag = "th");
#
# # Process the header lines:
# for line in header_lines:
# inner = do_table_line(state, line);
#
# if state.already_opened_section:
# inner += f"</{state.section_tag}>" "\n";
#
# # Handle line seperator:
# column_info = parse_colinfo(seperator_line);
#
# # Process the body lines:
# for lines in body_lines:
# state = State(section_tag = "tbody", \
# default_cell_tag = "td", \
# column_info = column_info);
#
# for line in lines:
# inner += do_table_line(state, line);
#
# if state.already_opened_section:
# inner += f"</{state.section_tag}>" "\n";
#
# # Consider the optional caption.
# # If it happens, it goes before everything else
# if optional_caption:
# inner = f"<caption> {optional_caption} </caption>\n" + inner;
#
# close_tag = "</table>\n";
#
# for o in inner.split("\n"):
# print(o);
#
# return "\n\n" + open_tag + inner + close_tag + "\n\n";
def handle_table(m):
print("handle_table");
matched = m.group(0);
print(f'matched = """{matched}"""');
optional_table_open = m.group(1);
one_or_more_header_lines = m.group(2);
header_lines = one_or_more_header_lines.strip().split("\n")
seperator_line = m.group(3);
one_or_more_body_lines = m.group(4);
body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")]
optional_caption = m.group(5);
assert(seperator_line is not None)
try:
return do_table(
optional_table_open,
one_or_more_header_lines.strip().split("\n"),
seperator_line,
[e.strip().split("\n")
for e in one_or_more_body_lines.strip().split("\n\n")],
optional_caption,
);
# handle explicit table tag?
if optional_table_open:
open_tag = optional_table_open + "\n";
else:
# otherwise, add a default one:
open_tag = "<table>" + "\n";
inner = "";
state = State(section_tag = "thead", default_cell_tag = "th");
# Process the header lines:
for line in header_lines:
inner = do_table_line(state, line);
if state.already_opened_section:
inner += f"</{state.section_tag}>" "\n";
# Handle line seperator:
column_info = parse_colinfo(seperator_line);
# Process the body lines:
for lines in body_lines:
state = State(section_tag = "tbody", \
default_cell_tag = "td", \
column_info = column_info);
for line in lines:
inner += do_table_line(state, line);
if state.already_opened_section:
inner += f"</{state.section_tag}>" "\n";
# Consider the optional caption.
# If it happens, it goes before everything else
if optional_caption:
inner = f"<caption> {optional_caption} </caption>\n" + inner;
close_tag = "</table>\n";
return "\n\n" + open_tag + inner + close_tag + "\n\n";
except SyntaxError as e:
print(f"caught syntax error: {e}");
print("moving on to next table...");
return m.group(0);
def handle_table_no_sep(m):
print("handle_table_no_sep");
matched = m.group(0);
print(f'matched = """{matched}"""');
table_open_tag = m.group(1) + "\n";
one_or_more_body_lines = m.group(2);
body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")]
optional_caption = m.group(3);
try:
inner = "";
# Process the body lines:
for lines in body_lines:
state = State(section_tag = "tbody", \
default_cell_tag = "td", \
column_info = []);
for line in lines:
inner += do_table_line(state, line);
if state.already_opened_section:
inner += f"</{state.section_tag}>" "\n";
# Consider the optional caption.
# If it happens, it goes before everything else
if optional_caption:
inner = f"<caption> {optional_caption} </caption>\n" + inner;
table_close_tag = "</table>\n";
return "\n\n" + table_open_tag + inner + table_close_tag + "\n\n";
except SyntaxError as e:
print(f"caught syntax error: {e}");
print("moving on to next table...");
@ -489,7 +568,7 @@ for o, c in product((1, 0), repeat=2):
[\n]{{2}}
# optional or required open table tag:
(?:(<table[\s]+[^<>]*markdown="1"[^<>]*>) \n){{{o},1}}
(?:(<table[\s]+[^<>]*markdown=(?:"1"|'1'|1)[^<>]*>) \n){{{o},1}}
# zero or one or more header rows:
((?: {row} \n){{{1-o},}})
@ -512,6 +591,28 @@ for o, c in product((1, 0), repeat=2):
text = re.sub(table, handle_table, text, flags=re.VERBOSE)
table = fr"""
# two blank lines:
[\n]{{2}}
# required open table tag:
(?:(<table[\s]+[^<>]*markdown=(?:"1"|'1'|1)[^<>]*>) \n)
# one or more body rows, with empty lines of one:
((?: {row} [\n]{{1,2}})+)
# optional caption:
(?: \[ ([a-z0-9 "']+) \] \n)?
# required close table tag:
(?: </table> [\n])
# two blank lines:
[\n]{{2}}
""";
text = re.sub(table, handle_table_no_sep, text, flags=re.VERBOSE)
text += """
<style>
table