settling in
This commit is contained in:
parent
bec3dca171
commit
f487d02f1c
2 changed files with 286 additions and 61 deletions
13
test.md
13
test.md
|
|
@ -6,6 +6,15 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nunc non fr
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
line 9 | 2
|
||||||
|
- | -
|
||||||
|
3 | 4
|
||||||
|
|
||||||
|
|
||||||
|
xyz
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -76,7 +85,7 @@ tiptoe | 2
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<caption> <ol> <li> 1 <li> 2 <li> 3 </ol> | foo
|
<caption> <ol> <li> 1 <li> 2 <li> 3 </ol> | foo88
|
||||||
<caption>foo<thead> bar | baz
|
<caption>foo<thead> bar | baz
|
||||||
| -
|
| -
|
||||||
| bar
|
| bar
|
||||||
|
|
@ -105,7 +114,7 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nunc non fr
|
||||||
|
|
||||||
|
|
||||||
<table markdown="1">
|
<table markdown="1">
|
||||||
|a
|
|a117
|
||||||
|-
|
|-
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
|
|
|
||||||
334
test.py
334
test.py
|
|
@ -424,39 +424,220 @@ def do_table_line(state, line):
|
||||||
def handle_table(m):
|
def handle_table(m):
|
||||||
print("handle_table");
|
print("handle_table");
|
||||||
|
|
||||||
|
assert(not "TODO");
|
||||||
|
# matched = m.group(0);
|
||||||
|
|
||||||
|
# optional_table_open = m.group(1);
|
||||||
|
|
||||||
|
# one_or_more_header_lines = m.group(2);
|
||||||
|
|
||||||
|
# header_lines = one_or_more_header_lines.strip().split("\n")
|
||||||
|
|
||||||
|
# seperator_line = m.group(3);
|
||||||
|
|
||||||
|
# one_or_more_body_lines = m.group(4);
|
||||||
|
|
||||||
|
# body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")]
|
||||||
|
|
||||||
|
# optional_caption = m.group(5);
|
||||||
|
|
||||||
|
# assert(seperator_line is not None)
|
||||||
|
|
||||||
|
# try:
|
||||||
|
# # handle explicit table tag?
|
||||||
|
# if optional_table_open:
|
||||||
|
# open_tag = optional_table_open + "\n";
|
||||||
|
# else:
|
||||||
|
# # otherwise, add a default one:
|
||||||
|
# open_tag = "<table>" + "\n";
|
||||||
|
|
||||||
|
# inner = "";
|
||||||
|
|
||||||
|
# state = State(section_tag = "thead", default_cell_tag = "th");
|
||||||
|
|
||||||
|
# # Process the header lines:
|
||||||
|
# for line in header_lines:
|
||||||
|
# inner = do_table_line(state, line);
|
||||||
|
|
||||||
|
# if state.already_opened_section:
|
||||||
|
# inner += f"</{state.section_tag}>" "\n";
|
||||||
|
|
||||||
|
# # Handle line seperator:
|
||||||
|
# column_info = parse_colinfo(seperator_line);
|
||||||
|
|
||||||
|
# # Process the body lines:
|
||||||
|
# for lines in body_lines:
|
||||||
|
# state = State(section_tag = "tbody", \
|
||||||
|
# default_cell_tag = "td", \
|
||||||
|
# column_info = column_info);
|
||||||
|
|
||||||
|
# for line in lines:
|
||||||
|
# inner += do_table_line(state, line);
|
||||||
|
|
||||||
|
# if state.already_opened_section:
|
||||||
|
# inner += f"</{state.section_tag}>" "\n";
|
||||||
|
|
||||||
|
# # Consider the optional caption.
|
||||||
|
# # If it happens, it goes before everything else
|
||||||
|
# if optional_caption:
|
||||||
|
# inner = f"<caption> {optional_caption} </caption>\n" + inner;
|
||||||
|
|
||||||
|
# close_tag = "</table>\n";
|
||||||
|
|
||||||
|
# return "\n\n" + open_tag + inner + close_tag + "\n\n";
|
||||||
|
# except SyntaxError as e:
|
||||||
|
# print(f"caught syntax error: {e}");
|
||||||
|
# print("moving on to next table...");
|
||||||
|
# return m.group(0);
|
||||||
|
|
||||||
|
def handle_table_no_sep(m):
|
||||||
|
print("handle_table_no_sep");
|
||||||
|
|
||||||
|
assert(not "TODO");
|
||||||
|
|
||||||
|
# matched = m.group(0);
|
||||||
|
|
||||||
|
# print(f'matched = """{matched}"""');
|
||||||
|
|
||||||
|
# table_open_tag = m.group(1) + "\n";
|
||||||
|
|
||||||
|
# one_or_more_body_lines = m.group(2);
|
||||||
|
|
||||||
|
# body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")]
|
||||||
|
|
||||||
|
# optional_caption = m.group(3);
|
||||||
|
|
||||||
|
# try:
|
||||||
|
# inner = "";
|
||||||
|
|
||||||
|
# # Process the body lines:
|
||||||
|
# for lines in body_lines:
|
||||||
|
# state = State(section_tag = "tbody", \
|
||||||
|
# default_cell_tag = "td", \
|
||||||
|
# column_info = []);
|
||||||
|
|
||||||
|
# for line in lines:
|
||||||
|
# inner += do_table_line(state, line);
|
||||||
|
|
||||||
|
# if state.already_opened_section:
|
||||||
|
# inner += f"</{state.section_tag}>" "\n";
|
||||||
|
|
||||||
|
# # Consider the optional caption.
|
||||||
|
# # If it happens, it goes before everything else
|
||||||
|
# if optional_caption:
|
||||||
|
# inner = f"<caption> {optional_caption} </caption>\n" + inner;
|
||||||
|
|
||||||
|
# table_close_tag = "</table>\n";
|
||||||
|
|
||||||
|
# return "\n\n" + table_open_tag + inner + table_close_tag + "\n\n";
|
||||||
|
|
||||||
|
# except SyntaxError as e:
|
||||||
|
# print(f"caught syntax error: {e}");
|
||||||
|
# print("moving on to next table...");
|
||||||
|
# return m.group(0);
|
||||||
|
|
||||||
|
def handle_table_case_1(m):
|
||||||
|
print("handle_table_case_1");
|
||||||
|
|
||||||
matched = m.group(0);
|
matched = m.group(0);
|
||||||
|
|
||||||
optional_table_open = m.group(1);
|
print(f'matched = """{matched}"""');
|
||||||
|
|
||||||
one_or_more_header_lines = m.group(2);
|
# required open table tag:
|
||||||
|
table_open_tag = m.group(1);
|
||||||
|
|
||||||
header_lines = one_or_more_header_lines.strip().split("\n")
|
# remove the 'markdown="1"' syntax
|
||||||
|
table_open_tag = re.sub(r"markdown=(?:\"1\"|'1'|1)", "", table_open_tag);
|
||||||
|
|
||||||
|
# zero or more header rows:
|
||||||
|
header_rows = m.group(2);
|
||||||
|
|
||||||
|
# required seperator line:
|
||||||
seperator_line = m.group(3);
|
seperator_line = m.group(3);
|
||||||
|
|
||||||
one_or_more_body_lines = m.group(4);
|
# zero or more body rows, with empty lines of one:
|
||||||
|
body_rows = m.group(4);
|
||||||
body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")]
|
|
||||||
|
|
||||||
|
# optional caption:
|
||||||
optional_caption = m.group(5);
|
optional_caption = m.group(5);
|
||||||
|
|
||||||
assert(seperator_line is not None)
|
try:
|
||||||
|
inner = "";
|
||||||
|
|
||||||
|
# Process the (optional) header lines:
|
||||||
|
if header_rows is not None:
|
||||||
|
state = State(section_tag = "thead", default_cell_tag = "th");
|
||||||
|
|
||||||
|
for line in header_rows.strip().split('\n'):
|
||||||
|
inner += do_table_line(state, line);
|
||||||
|
|
||||||
|
if state.already_opened_section:
|
||||||
|
inner += f"</{state.section_tag}>" "\n";
|
||||||
|
|
||||||
|
# Handle line seperator:
|
||||||
|
column_info = parse_colinfo(seperator_line);
|
||||||
|
|
||||||
|
if body_rows is not None and body_rows.strip():
|
||||||
|
for body in body_rows.strip().split('\n\n'):
|
||||||
|
state = State(section_tag = "tbody", \
|
||||||
|
default_cell_tag = "td", \
|
||||||
|
column_info = column_info);
|
||||||
|
|
||||||
|
for line in body.strip().split('\n'):
|
||||||
|
inner += do_table_line(state, line);
|
||||||
|
|
||||||
|
if state.already_opened_section:
|
||||||
|
inner += f"</{state.section_tag}>" "\n";
|
||||||
|
|
||||||
|
# Consider the optional caption.
|
||||||
|
# If it happens, it goes before everything else
|
||||||
|
if optional_caption:
|
||||||
|
inner = f"<caption> {optional_caption} </caption>\n" + inner;
|
||||||
|
|
||||||
|
table_open_tag = table_open_tag + "\n";
|
||||||
|
table_close_tag = "</table>\n";
|
||||||
|
|
||||||
|
return "\n\n" + table_open_tag + inner + table_close_tag + "\n";
|
||||||
|
except SyntaxError as e:
|
||||||
|
print(f"caught syntax error: {e}");
|
||||||
|
print("moving on to next table...");
|
||||||
|
return m.group(0);
|
||||||
|
|
||||||
|
def handle_table_case_2(m):
|
||||||
|
print("handle_table_case_2");
|
||||||
|
|
||||||
|
matched = m.group(0);
|
||||||
|
|
||||||
|
print(f'matched = """{matched}"""');
|
||||||
|
|
||||||
|
# no open table tag:
|
||||||
|
|
||||||
|
# one or more header rows:
|
||||||
|
header_rows = m.group(1);
|
||||||
|
|
||||||
|
# line seperator:
|
||||||
|
seperator_line = m.group(2);
|
||||||
|
|
||||||
|
print(f'seperator_line = "{seperator_line.strip()}"');
|
||||||
|
|
||||||
|
# one or more body rows, with empty lines of one:
|
||||||
|
body_rows = m.group(3);
|
||||||
|
|
||||||
|
print(f'body_rows = "{body_rows}"');
|
||||||
|
|
||||||
|
# optional caption:
|
||||||
|
optional_caption = m.group(4);
|
||||||
|
|
||||||
|
# no close table tag:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# handle explicit table tag?
|
|
||||||
if optional_table_open:
|
|
||||||
open_tag = optional_table_open + "\n";
|
|
||||||
else:
|
|
||||||
# otherwise, add a default one:
|
|
||||||
open_tag = "<table>" + "\n";
|
|
||||||
|
|
||||||
inner = "";
|
inner = "";
|
||||||
|
|
||||||
state = State(section_tag = "thead", default_cell_tag = "th");
|
state = State(section_tag = "thead", default_cell_tag = "th");
|
||||||
|
|
||||||
# Process the header lines:
|
# Process the required header lines:
|
||||||
for line in header_lines:
|
for line in header_rows.strip().split('\n'):
|
||||||
inner = do_table_line(state, line);
|
inner += do_table_line(state, line);
|
||||||
|
|
||||||
if state.already_opened_section:
|
if state.already_opened_section:
|
||||||
inner += f"</{state.section_tag}>" "\n";
|
inner += f"</{state.section_tag}>" "\n";
|
||||||
|
|
@ -464,13 +645,12 @@ def handle_table(m):
|
||||||
# Handle line seperator:
|
# Handle line seperator:
|
||||||
column_info = parse_colinfo(seperator_line);
|
column_info = parse_colinfo(seperator_line);
|
||||||
|
|
||||||
# Process the body lines:
|
for body in body_rows.strip().split('\n\n'):
|
||||||
for lines in body_lines:
|
|
||||||
state = State(section_tag = "tbody", \
|
state = State(section_tag = "tbody", \
|
||||||
default_cell_tag = "td", \
|
default_cell_tag = "td", \
|
||||||
column_info = column_info);
|
column_info = column_info);
|
||||||
|
|
||||||
for line in lines:
|
for line in body.strip().split('\n'):
|
||||||
inner += do_table_line(state, line);
|
inner += do_table_line(state, line);
|
||||||
|
|
||||||
if state.already_opened_section:
|
if state.already_opened_section:
|
||||||
|
|
@ -481,39 +661,47 @@ def handle_table(m):
|
||||||
if optional_caption:
|
if optional_caption:
|
||||||
inner = f"<caption> {optional_caption} </caption>\n" + inner;
|
inner = f"<caption> {optional_caption} </caption>\n" + inner;
|
||||||
|
|
||||||
close_tag = "</table>\n";
|
table_open_tag = "<table>\n";
|
||||||
|
table_close_tag = "</table>\n";
|
||||||
|
|
||||||
return "\n\n" + open_tag + inner + close_tag + "\n\n";
|
return "\n\n" + table_open_tag + inner + table_close_tag + "\n";
|
||||||
except SyntaxError as e:
|
except SyntaxError as e:
|
||||||
print(f"caught syntax error: {e}");
|
print(f"caught syntax error: {e}");
|
||||||
print("moving on to next table...");
|
print("moving on to next table...");
|
||||||
return m.group(0);
|
return m.group(0);
|
||||||
|
|
||||||
def handle_table_no_sep(m):
|
def handle_table_case_3(m):
|
||||||
print("handle_table_no_sep");
|
print("handle_table_case_3");
|
||||||
|
|
||||||
matched = m.group(0);
|
matched = m.group(0);
|
||||||
|
|
||||||
print(f'matched = """{matched}"""');
|
print(f'matched = """{matched}"""');
|
||||||
|
|
||||||
table_open_tag = m.group(1) + "\n";
|
# required open table tag:
|
||||||
|
table_open_tag = m.group(1);
|
||||||
|
|
||||||
one_or_more_body_lines = m.group(2);
|
# remove the 'markdown="1"' syntax
|
||||||
|
table_open_tag = re.sub(r"markdown=(?:\"1\"|'1'|1)", "", table_open_tag);
|
||||||
|
|
||||||
body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")]
|
# one or more body rows, with empty lines of one:
|
||||||
|
body_rows = m.group(2);
|
||||||
|
|
||||||
|
# no line seperator
|
||||||
|
|
||||||
|
# optional caption:
|
||||||
optional_caption = m.group(3);
|
optional_caption = m.group(3);
|
||||||
|
|
||||||
|
# optional close table tag.
|
||||||
|
|
||||||
try:
|
try:
|
||||||
inner = "";
|
inner = "";
|
||||||
|
|
||||||
# Process the body lines:
|
for body in body_rows.strip().split('\n\n'):
|
||||||
for lines in body_lines:
|
|
||||||
state = State(section_tag = "tbody", \
|
state = State(section_tag = "tbody", \
|
||||||
default_cell_tag = "td", \
|
default_cell_tag = "td", \
|
||||||
column_info = []);
|
column_info = []);
|
||||||
|
|
||||||
for line in lines:
|
for line in body.strip().split('\n'):
|
||||||
inner += do_table_line(state, line);
|
inner += do_table_line(state, line);
|
||||||
|
|
||||||
if state.already_opened_section:
|
if state.already_opened_section:
|
||||||
|
|
@ -524,10 +712,10 @@ def handle_table_no_sep(m):
|
||||||
if optional_caption:
|
if optional_caption:
|
||||||
inner = f"<caption> {optional_caption} </caption>\n" + inner;
|
inner = f"<caption> {optional_caption} </caption>\n" + inner;
|
||||||
|
|
||||||
|
table_open_tag = table_open_tag + "\n";
|
||||||
table_close_tag = "</table>\n";
|
table_close_tag = "</table>\n";
|
||||||
|
|
||||||
return "\n\n" + table_open_tag + inner + table_close_tag + "\n\n";
|
return "\n\n" + table_open_tag + inner + table_close_tag + "\n";
|
||||||
|
|
||||||
except SyntaxError as e:
|
except SyntaxError as e:
|
||||||
print(f"caught syntax error: {e}");
|
print(f"caught syntax error: {e}");
|
||||||
print("moving on to next table...");
|
print("moving on to next table...");
|
||||||
|
|
@ -537,7 +725,7 @@ with open("test.md") as stream:
|
||||||
text = stream.read();
|
text = stream.read();
|
||||||
|
|
||||||
# delimiters between cells
|
# delimiters between cells
|
||||||
delimiter = r"(?:[|]|<(?:tr|th)(?:[\s]+[^<>]*)?>)";
|
delimiter = r"(?:[|]|<(?:td|th)(?:[\s]+[^<>]*)?>)";
|
||||||
|
|
||||||
# A row is anything with at least one delimiter
|
# A row is anything with at least one delimiter
|
||||||
row = fr"(?: .* {delimiter} .*)";
|
row = fr"(?: .* {delimiter} .*)";
|
||||||
|
|
@ -560,35 +748,61 @@ row = fr"(?:{row} | {table_part}+)";
|
||||||
# Between the header rows and the body rows there is a line seperator.
|
# Between the header rows and the body rows there is a line seperator.
|
||||||
seperator_line = r"\s* [|]? \s* [-=:]+ \s* (?: \s* [|] \s* [-=:]* \s* )* \s*"
|
seperator_line = r"\s* [|]? \s* [-=:]+ \s* (?: \s* [|] \s* [-=:]* \s* )* \s*"
|
||||||
|
|
||||||
# Regex for whole table:
|
table = fr"""
|
||||||
for o, c in product((1, 0), repeat=2):
|
# two blank lines:
|
||||||
table = fr"""
|
[\n]{{2}}
|
||||||
# two blank lines:
|
|
||||||
[\n]{{2}}
|
|
||||||
|
|
||||||
# optional or required open table tag:
|
# required open table tag:
|
||||||
(?:(<table[\s]+[^<>]*markdown=(?:"1"|'1'|1)[^<>]*>) \n){{{o},1}}
|
(?:(<table[\s]+[^<>]*markdown=(?:"1"|'1'|1)[^<>]*>) \n)
|
||||||
|
|
||||||
# zero or one or more header rows:
|
# zero or more header rows:
|
||||||
((?: {row} \n){{{1-o},}})
|
((?: {row} \n)+)
|
||||||
|
|
||||||
# line seperator:
|
# required line seperator:
|
||||||
({seperator_line}) [\n]
|
({seperator_line} [\n])
|
||||||
|
|
||||||
# zero or one or more body rows, with empty lines of one:
|
# zero or more body rows, with empty lines of one:
|
||||||
((?: {row} [\n]{{1,2}}){{{1-c},}})
|
((?: {row} [\n]{{1,2}})*)
|
||||||
|
|
||||||
# optional caption:
|
# optional caption:
|
||||||
(?: \[ ([a-z0-9 "']+) \] \n)?
|
(?: \[ ([a-z0-9 "']+) \] \n)?
|
||||||
|
|
||||||
# optional or required close table tag:
|
# optional close table tag:
|
||||||
(?: </table> [\n]){{{c},1}}
|
(?: </table> [\n])?
|
||||||
|
|
||||||
# two blank lines:
|
# two blank lines (another newline already matched earlier)
|
||||||
[\n]{{2}}
|
[\n]{{1}}
|
||||||
""";
|
""";
|
||||||
|
|
||||||
text = re.sub(table, handle_table, text, flags=re.VERBOSE)
|
text = re.sub(table, handle_table_case_1, text, flags=re.VERBOSE)
|
||||||
|
|
||||||
|
table = fr"""
|
||||||
|
# two blank lines:
|
||||||
|
[\n]{{2}}
|
||||||
|
|
||||||
|
# no open table tag:
|
||||||
|
# (?:(<table[\s]+[^<>]*markdown=(?:"1"|'1'|1)[^<>]*>) \n)?
|
||||||
|
|
||||||
|
# one or more header rows:
|
||||||
|
((?: {row} \n)+)
|
||||||
|
|
||||||
|
# line seperator:
|
||||||
|
({seperator_line} [\n])
|
||||||
|
|
||||||
|
# one or more body rows, with empty lines of one:
|
||||||
|
((?: {row} [\n]{{1,2}})+)
|
||||||
|
|
||||||
|
# optional caption:
|
||||||
|
(?: \[ ([a-z0-9 "']+) \] \n)?
|
||||||
|
|
||||||
|
# no close table tag:
|
||||||
|
# (?: </table> [\n])?
|
||||||
|
|
||||||
|
# two blank lines (another newline already matched earlier)
|
||||||
|
[\n]{{1}}
|
||||||
|
""";
|
||||||
|
|
||||||
|
text = re.sub(table, handle_table_case_2, text, flags=re.VERBOSE)
|
||||||
|
|
||||||
table = fr"""
|
table = fr"""
|
||||||
# two blank lines:
|
# two blank lines:
|
||||||
|
|
@ -600,17 +814,19 @@ table = fr"""
|
||||||
# one or more body rows, with empty lines of one:
|
# one or more body rows, with empty lines of one:
|
||||||
((?: {row} [\n]{{1,2}})+)
|
((?: {row} [\n]{{1,2}})+)
|
||||||
|
|
||||||
|
# no line seperator
|
||||||
|
|
||||||
# optional caption:
|
# optional caption:
|
||||||
(?: \[ ([a-z0-9 "']+) \] \n)?
|
(?: \[ ([a-z0-9 "']+) \] \n)?
|
||||||
|
|
||||||
# required close table tag:
|
# optional close table tag:
|
||||||
(?: </table> [\n])
|
(?: </table> [\n])?
|
||||||
|
|
||||||
# two blank lines:
|
# two blank lines (another newline already matched earlier)
|
||||||
[\n]{{2}}
|
[\n]{{1}}
|
||||||
""";
|
""";
|
||||||
|
|
||||||
text = re.sub(table, handle_table_no_sep, text, flags=re.VERBOSE)
|
text = re.sub(table, handle_table_case_3, text, flags=re.VERBOSE)
|
||||||
|
|
||||||
text += """
|
text += """
|
||||||
<style>
|
<style>
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue