.
This commit is contained in:
parent
4d6169ff59
commit
dcbf8db706
3 changed files with 219 additions and 71 deletions
|
|
@ -28,6 +28,7 @@
|
||||||
gedit
|
gedit
|
||||||
python3
|
python3
|
||||||
meld
|
meld
|
||||||
|
zip
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
|
||||||
46
test.md
46
test.md
|
|
@ -30,6 +30,52 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nunc non fr
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<table markdown=1>
|
||||||
|
tiptoe | 2
|
||||||
|
|
||||||
|
3 | 4
|
||||||
|
</table>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<table markdown=1>
|
||||||
|
tiptoe | 2
|
||||||
|
3 | 4
|
||||||
|
5 | 6
|
||||||
|
[duh]
|
||||||
|
</table>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<table markdown=1>
|
||||||
|
<tfoot>
|
||||||
|
1 | 2
|
||||||
|
<tbody>
|
||||||
|
3 | 4
|
||||||
|
<thead>
|
||||||
|
5 | 6
|
||||||
|
</table>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<table markdown=1>
|
||||||
|
1 | 2
|
||||||
|
|
||||||
|
3 | 4
|
||||||
|
|
||||||
|
5 | 6
|
||||||
|
[duh]
|
||||||
|
</table>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<caption> <ol> <li> 1 <li> 2 <li> 3 </ol> | foo
|
<caption> <ol> <li> 1 <li> 2 <li> 3 </ol> | foo
|
||||||
<caption>foo<thead> bar | baz
|
<caption>foo<thead> bar | baz
|
||||||
| -
|
| -
|
||||||
|
|
|
||||||
243
test.py
243
test.py
|
|
@ -67,6 +67,7 @@ def do_table_line(state, line):
|
||||||
I'm supposed to return the whole HTML.
|
I'm supposed to return the whole HTML.
|
||||||
Including the <tr> if needed.
|
Including the <tr> if needed.
|
||||||
"""
|
"""
|
||||||
|
print("do_table_line");
|
||||||
|
|
||||||
print(f'line = "{line}"');
|
print(f'line = "{line}"');
|
||||||
|
|
||||||
|
|
@ -74,7 +75,7 @@ def do_table_line(state, line):
|
||||||
|
|
||||||
passthrough_pattern = fr"(?:<{tags}(?:[\s]+[^>]*)?>|</{tags}>)";
|
passthrough_pattern = fr"(?:<{tags}(?:[\s]+[^>]*)?>|</{tags}>)";
|
||||||
|
|
||||||
start_tag_pattern = fr"<{state.section_tag}(?:[\s]+[^>]*)?>"
|
start_tag_pattern = fr"<(thead|tbody|tfoot)(?:[\s]+[^>]*)?>"
|
||||||
|
|
||||||
open_tr_pattern = r"<tr(?:[\s]+[^<>]*)?>[\s]*";
|
open_tr_pattern = r"<tr(?:[\s]+[^<>]*)?>[\s]*";
|
||||||
|
|
||||||
|
|
@ -84,7 +85,7 @@ def do_table_line(state, line):
|
||||||
|
|
||||||
caption_sentinel_pattern = "(?:" + \
|
caption_sentinel_pattern = "(?:" + \
|
||||||
'|'.join((cell_delimiter, passthrough_pattern, start_tag_pattern,
|
'|'.join((cell_delimiter, passthrough_pattern, start_tag_pattern,
|
||||||
open_tr_pattern, open_caption_pattern)) + ")"
|
open_tr_pattern, open_caption_pattern, r'</?table.*')) + ")"
|
||||||
|
|
||||||
already_open_tr = 0;
|
already_open_tr = 0;
|
||||||
|
|
||||||
|
|
@ -94,8 +95,13 @@ def do_table_line(state, line):
|
||||||
# Is it whatever our start tag is?
|
# Is it whatever our start tag is?
|
||||||
if (m := re.match(start_tag_pattern, line)):
|
if (m := re.match(start_tag_pattern, line)):
|
||||||
# we'll pass this through, and remember that we don't need to do
|
# we'll pass this through, and remember that we don't need to do
|
||||||
# it ourselves
|
# it ourselves also possibly close previous section also change
|
||||||
print("found our start tag");
|
# "section_tag" to be thead
|
||||||
|
print(f"found our start tag: '{m.group(0)}'");
|
||||||
|
|
||||||
|
# if we're already open, close whatever that was
|
||||||
|
if state.already_opened_section:
|
||||||
|
out += f'</{state.section_tag}>' + "\n";
|
||||||
|
|
||||||
out += m.group(0) + "\n";
|
out += m.group(0) + "\n";
|
||||||
|
|
||||||
|
|
@ -103,6 +109,7 @@ def do_table_line(state, line):
|
||||||
|
|
||||||
print(f'line = "{line}"');
|
print(f'line = "{line}"');
|
||||||
|
|
||||||
|
state.section_tag = m.group(1);
|
||||||
state.already_opened_section = 1;
|
state.already_opened_section = 1;
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -225,7 +232,6 @@ def do_table_line(state, line):
|
||||||
line = line[1:];
|
line = line[1:];
|
||||||
|
|
||||||
print(f'line = "{line}"');
|
print(f'line = "{line}"');
|
||||||
# elif (m := re.match(r"<([a-z]+)(?:[\s]+([^<>]*))?>", line)):
|
|
||||||
elif (m := re.match(r"<(th|td)(?:[\s]+([^<>]*))?>", line)):
|
elif (m := re.match(r"<(th|td)(?:[\s]+([^<>]*))?>", line)):
|
||||||
print("found opening HTML tag");
|
print("found opening HTML tag");
|
||||||
|
|
||||||
|
|
@ -263,10 +269,10 @@ def do_table_line(state, line):
|
||||||
line = line[len(m.group(0)):];
|
line = line[len(m.group(0)):];
|
||||||
|
|
||||||
print(f'line = "{line}"');
|
print(f'line = "{line}"');
|
||||||
# elif (m := re.match(r"<([a-z]+)", line)):
|
elif (m := re.match(r"<(th|td)", line)):
|
||||||
# print("found HTML open, but it's incomplete? huh?! throwing!");
|
print("found HTML open, but it's incomplete? huh?! throwing!");
|
||||||
|
|
||||||
# raise SyntaxError("could not find '>' for HTML open tag");
|
raise SyntaxError("could not find '>' for HTML open tag");
|
||||||
elif column_index < len(state.column_info):
|
elif column_index < len(state.column_info):
|
||||||
print("found nothing, defaulting to column info");
|
print("found nothing, defaulting to column info");
|
||||||
|
|
||||||
|
|
@ -328,10 +334,6 @@ def do_table_line(state, line):
|
||||||
line = line[len(m.group(0)):]
|
line = line[len(m.group(0)):]
|
||||||
print(f'line = "{line}"');
|
print(f'line = "{line}"');
|
||||||
print(f'depth = {depth}');
|
print(f'depth = {depth}');
|
||||||
# elif (m := re.match(fr"</{tags}>", line)):
|
|
||||||
# # ignore the closers for table parts, no passthrough
|
|
||||||
# line = line[len(m.group(0)):]
|
|
||||||
# print(f'line = "{line}"');
|
|
||||||
elif (m := re.match("<[a-z]+(?:\\s+[^<>]*)?>", line)):
|
elif (m := re.match("<[a-z]+(?:\\s+[^<>]*)?>", line)):
|
||||||
content += m.group(0);
|
content += m.group(0);
|
||||||
line = line[len(m.group(0)):]
|
line = line[len(m.group(0)):]
|
||||||
|
|
@ -373,83 +375,160 @@ def do_table_line(state, line):
|
||||||
|
|
||||||
return out;
|
return out;
|
||||||
|
|
||||||
def do_table(table_open_tag, header_lines, seperator_line, body_lines, optional_caption):
|
# def do_table(table_open_tag, header_lines, seperator_line, body_lines, optional_caption):
|
||||||
|
#
|
||||||
# handle explicit table tag?
|
# # handle explicit table tag?
|
||||||
if table_open_tag:
|
# if table_open_tag:
|
||||||
open_tag = table_open_tag + "\n";
|
# open_tag = table_open_tag + "\n";
|
||||||
else:
|
# else:
|
||||||
# otherwise, add a default one:
|
# # otherwise, add a default one:
|
||||||
open_tag = "<table>" + "\n";
|
# open_tag = "<table>" + "\n";
|
||||||
|
#
|
||||||
inner = "";
|
# inner = "";
|
||||||
|
#
|
||||||
state = State(section_tag = "thead", default_cell_tag = "th");
|
# state = State(section_tag = "thead", default_cell_tag = "th");
|
||||||
|
#
|
||||||
# Process the header lines:
|
# # Process the header lines:
|
||||||
for line in header_lines:
|
# for line in header_lines:
|
||||||
html_table_line = do_table_line(state, line);
|
# inner = do_table_line(state, line);
|
||||||
|
#
|
||||||
inner += html_table_line;
|
# if state.already_opened_section:
|
||||||
|
# inner += f"</{state.section_tag}>" "\n";
|
||||||
if state.already_opened_section:
|
#
|
||||||
inner += "</thead>" "\n";
|
# # Handle line seperator:
|
||||||
|
# column_info = parse_colinfo(seperator_line);
|
||||||
# Handle line seperator:
|
#
|
||||||
column_info = parse_colinfo(seperator_line);
|
# # Process the body lines:
|
||||||
|
# for lines in body_lines:
|
||||||
# Process the body lines:
|
# state = State(section_tag = "tbody", \
|
||||||
for lines in body_lines:
|
# default_cell_tag = "td", \
|
||||||
state = State(section_tag = "tbody", \
|
# column_info = column_info);
|
||||||
default_cell_tag = "td", \
|
#
|
||||||
column_info = column_info);
|
# for line in lines:
|
||||||
|
# inner += do_table_line(state, line);
|
||||||
for line in lines:
|
#
|
||||||
inner += do_table_line(state, line);
|
# if state.already_opened_section:
|
||||||
|
# inner += f"</{state.section_tag}>" "\n";
|
||||||
if state.already_opened_section:
|
#
|
||||||
inner += "</tbody>" "\n";
|
# # Consider the optional caption.
|
||||||
|
# # If it happens, it goes before everything else
|
||||||
# Consider the optional caption.
|
# if optional_caption:
|
||||||
# If it happens, it goes before everything else
|
# inner = f"<caption> {optional_caption} </caption>\n" + inner;
|
||||||
if optional_caption:
|
#
|
||||||
inner = f"<caption> {optional_caption} </caption>\n" + inner;
|
# close_tag = "</table>\n";
|
||||||
|
#
|
||||||
close_tag = "</table>\n";
|
# for o in inner.split("\n"):
|
||||||
|
# print(o);
|
||||||
for o in inner.split("\n"):
|
#
|
||||||
print(o);
|
# return "\n\n" + open_tag + inner + close_tag + "\n\n";
|
||||||
|
|
||||||
return "\n\n" + open_tag + inner + close_tag + "\n\n";
|
|
||||||
|
|
||||||
def handle_table(m):
|
def handle_table(m):
|
||||||
print("handle_table");
|
print("handle_table");
|
||||||
|
|
||||||
matched = m.group(0);
|
matched = m.group(0);
|
||||||
|
|
||||||
print(f'matched = """{matched}"""');
|
|
||||||
|
|
||||||
optional_table_open = m.group(1);
|
optional_table_open = m.group(1);
|
||||||
|
|
||||||
one_or_more_header_lines = m.group(2);
|
one_or_more_header_lines = m.group(2);
|
||||||
|
|
||||||
|
header_lines = one_or_more_header_lines.strip().split("\n")
|
||||||
|
|
||||||
seperator_line = m.group(3);
|
seperator_line = m.group(3);
|
||||||
|
|
||||||
one_or_more_body_lines = m.group(4);
|
one_or_more_body_lines = m.group(4);
|
||||||
|
|
||||||
|
body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")]
|
||||||
|
|
||||||
optional_caption = m.group(5);
|
optional_caption = m.group(5);
|
||||||
|
|
||||||
assert(seperator_line is not None)
|
assert(seperator_line is not None)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return do_table(
|
# handle explicit table tag?
|
||||||
optional_table_open,
|
if optional_table_open:
|
||||||
one_or_more_header_lines.strip().split("\n"),
|
open_tag = optional_table_open + "\n";
|
||||||
seperator_line,
|
else:
|
||||||
[e.strip().split("\n")
|
# otherwise, add a default one:
|
||||||
for e in one_or_more_body_lines.strip().split("\n\n")],
|
open_tag = "<table>" + "\n";
|
||||||
optional_caption,
|
|
||||||
);
|
inner = "";
|
||||||
|
|
||||||
|
state = State(section_tag = "thead", default_cell_tag = "th");
|
||||||
|
|
||||||
|
# Process the header lines:
|
||||||
|
for line in header_lines:
|
||||||
|
inner = do_table_line(state, line);
|
||||||
|
|
||||||
|
if state.already_opened_section:
|
||||||
|
inner += f"</{state.section_tag}>" "\n";
|
||||||
|
|
||||||
|
# Handle line seperator:
|
||||||
|
column_info = parse_colinfo(seperator_line);
|
||||||
|
|
||||||
|
# Process the body lines:
|
||||||
|
for lines in body_lines:
|
||||||
|
state = State(section_tag = "tbody", \
|
||||||
|
default_cell_tag = "td", \
|
||||||
|
column_info = column_info);
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
inner += do_table_line(state, line);
|
||||||
|
|
||||||
|
if state.already_opened_section:
|
||||||
|
inner += f"</{state.section_tag}>" "\n";
|
||||||
|
|
||||||
|
# Consider the optional caption.
|
||||||
|
# If it happens, it goes before everything else
|
||||||
|
if optional_caption:
|
||||||
|
inner = f"<caption> {optional_caption} </caption>\n" + inner;
|
||||||
|
|
||||||
|
close_tag = "</table>\n";
|
||||||
|
|
||||||
|
return "\n\n" + open_tag + inner + close_tag + "\n\n";
|
||||||
|
except SyntaxError as e:
|
||||||
|
print(f"caught syntax error: {e}");
|
||||||
|
print("moving on to next table...");
|
||||||
|
return m.group(0);
|
||||||
|
|
||||||
|
def handle_table_no_sep(m):
|
||||||
|
print("handle_table_no_sep");
|
||||||
|
|
||||||
|
matched = m.group(0);
|
||||||
|
|
||||||
|
print(f'matched = """{matched}"""');
|
||||||
|
|
||||||
|
table_open_tag = m.group(1) + "\n";
|
||||||
|
|
||||||
|
one_or_more_body_lines = m.group(2);
|
||||||
|
|
||||||
|
body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")]
|
||||||
|
|
||||||
|
optional_caption = m.group(3);
|
||||||
|
|
||||||
|
try:
|
||||||
|
inner = "";
|
||||||
|
|
||||||
|
# Process the body lines:
|
||||||
|
for lines in body_lines:
|
||||||
|
state = State(section_tag = "tbody", \
|
||||||
|
default_cell_tag = "td", \
|
||||||
|
column_info = []);
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
inner += do_table_line(state, line);
|
||||||
|
|
||||||
|
if state.already_opened_section:
|
||||||
|
inner += f"</{state.section_tag}>" "\n";
|
||||||
|
|
||||||
|
# Consider the optional caption.
|
||||||
|
# If it happens, it goes before everything else
|
||||||
|
if optional_caption:
|
||||||
|
inner = f"<caption> {optional_caption} </caption>\n" + inner;
|
||||||
|
|
||||||
|
table_close_tag = "</table>\n";
|
||||||
|
|
||||||
|
return "\n\n" + table_open_tag + inner + table_close_tag + "\n\n";
|
||||||
|
|
||||||
except SyntaxError as e:
|
except SyntaxError as e:
|
||||||
print(f"caught syntax error: {e}");
|
print(f"caught syntax error: {e}");
|
||||||
print("moving on to next table...");
|
print("moving on to next table...");
|
||||||
|
|
@ -489,7 +568,7 @@ for o, c in product((1, 0), repeat=2):
|
||||||
[\n]{{2}}
|
[\n]{{2}}
|
||||||
|
|
||||||
# optional or required open table tag:
|
# optional or required open table tag:
|
||||||
(?:(<table[\s]+[^<>]*markdown="1"[^<>]*>) \n){{{o},1}}
|
(?:(<table[\s]+[^<>]*markdown=(?:"1"|'1'|1)[^<>]*>) \n){{{o},1}}
|
||||||
|
|
||||||
# zero or one or more header rows:
|
# zero or one or more header rows:
|
||||||
((?: {row} \n){{{1-o},}})
|
((?: {row} \n){{{1-o},}})
|
||||||
|
|
@ -512,6 +591,28 @@ for o, c in product((1, 0), repeat=2):
|
||||||
|
|
||||||
text = re.sub(table, handle_table, text, flags=re.VERBOSE)
|
text = re.sub(table, handle_table, text, flags=re.VERBOSE)
|
||||||
|
|
||||||
|
table = fr"""
|
||||||
|
# two blank lines:
|
||||||
|
[\n]{{2}}
|
||||||
|
|
||||||
|
# required open table tag:
|
||||||
|
(?:(<table[\s]+[^<>]*markdown=(?:"1"|'1'|1)[^<>]*>) \n)
|
||||||
|
|
||||||
|
# one or more body rows, with empty lines of one:
|
||||||
|
((?: {row} [\n]{{1,2}})+)
|
||||||
|
|
||||||
|
# optional caption:
|
||||||
|
(?: \[ ([a-z0-9 "']+) \] \n)?
|
||||||
|
|
||||||
|
# required close table tag:
|
||||||
|
(?: </table> [\n])
|
||||||
|
|
||||||
|
# two blank lines:
|
||||||
|
[\n]{{2}}
|
||||||
|
""";
|
||||||
|
|
||||||
|
text = re.sub(table, handle_table_no_sep, text, flags=re.VERBOSE)
|
||||||
|
|
||||||
text += """
|
text += """
|
||||||
<style>
|
<style>
|
||||||
table
|
table
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue