settling in
This commit is contained in:
parent
bec3dca171
commit
f487d02f1c
2 changed files with 286 additions and 61 deletions
13
test.md
13
test.md
|
|
@ -6,6 +6,15 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nunc non fr
|
|||
|
||||
|
||||
|
||||
line 9 | 2
|
||||
- | -
|
||||
3 | 4
|
||||
|
||||
|
||||
xyz
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -76,7 +85,7 @@ tiptoe | 2
|
|||
|
||||
|
||||
|
||||
<caption> <ol> <li> 1 <li> 2 <li> 3 </ol> | foo
|
||||
<caption> <ol> <li> 1 <li> 2 <li> 3 </ol> | foo88
|
||||
<caption>foo<thead> bar | baz
|
||||
| -
|
||||
| bar
|
||||
|
|
@ -105,7 +114,7 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nunc non fr
|
|||
|
||||
|
||||
<table markdown="1">
|
||||
|a
|
||||
|a117
|
||||
|-
|
||||
</table>
|
||||
|
||||
|
|
|
|||
334
test.py
334
test.py
|
|
@ -424,39 +424,220 @@ def do_table_line(state, line):
|
|||
def handle_table(m):
|
||||
print("handle_table");
|
||||
|
||||
assert(not "TODO");
|
||||
# matched = m.group(0);
|
||||
|
||||
# optional_table_open = m.group(1);
|
||||
|
||||
# one_or_more_header_lines = m.group(2);
|
||||
|
||||
# header_lines = one_or_more_header_lines.strip().split("\n")
|
||||
|
||||
# seperator_line = m.group(3);
|
||||
|
||||
# one_or_more_body_lines = m.group(4);
|
||||
|
||||
# body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")]
|
||||
|
||||
# optional_caption = m.group(5);
|
||||
|
||||
# assert(seperator_line is not None)
|
||||
|
||||
# try:
|
||||
# # handle explicit table tag?
|
||||
# if optional_table_open:
|
||||
# open_tag = optional_table_open + "\n";
|
||||
# else:
|
||||
# # otherwise, add a default one:
|
||||
# open_tag = "<table>" + "\n";
|
||||
|
||||
# inner = "";
|
||||
|
||||
# state = State(section_tag = "thead", default_cell_tag = "th");
|
||||
|
||||
# # Process the header lines:
|
||||
# for line in header_lines:
|
||||
# inner = do_table_line(state, line);
|
||||
|
||||
# if state.already_opened_section:
|
||||
# inner += f"</{state.section_tag}>" "\n";
|
||||
|
||||
# # Handle line seperator:
|
||||
# column_info = parse_colinfo(seperator_line);
|
||||
|
||||
# # Process the body lines:
|
||||
# for lines in body_lines:
|
||||
# state = State(section_tag = "tbody", \
|
||||
# default_cell_tag = "td", \
|
||||
# column_info = column_info);
|
||||
|
||||
# for line in lines:
|
||||
# inner += do_table_line(state, line);
|
||||
|
||||
# if state.already_opened_section:
|
||||
# inner += f"</{state.section_tag}>" "\n";
|
||||
|
||||
# # Consider the optional caption.
|
||||
# # If it happens, it goes before everything else
|
||||
# if optional_caption:
|
||||
# inner = f"<caption> {optional_caption} </caption>\n" + inner;
|
||||
|
||||
# close_tag = "</table>\n";
|
||||
|
||||
# return "\n\n" + open_tag + inner + close_tag + "\n\n";
|
||||
# except SyntaxError as e:
|
||||
# print(f"caught syntax error: {e}");
|
||||
# print("moving on to next table...");
|
||||
# return m.group(0);
|
||||
|
||||
def handle_table_no_sep(m):
|
||||
print("handle_table_no_sep");
|
||||
|
||||
assert(not "TODO");
|
||||
|
||||
# matched = m.group(0);
|
||||
|
||||
# print(f'matched = """{matched}"""');
|
||||
|
||||
# table_open_tag = m.group(1) + "\n";
|
||||
|
||||
# one_or_more_body_lines = m.group(2);
|
||||
|
||||
# body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")]
|
||||
|
||||
# optional_caption = m.group(3);
|
||||
|
||||
# try:
|
||||
# inner = "";
|
||||
|
||||
# # Process the body lines:
|
||||
# for lines in body_lines:
|
||||
# state = State(section_tag = "tbody", \
|
||||
# default_cell_tag = "td", \
|
||||
# column_info = []);
|
||||
|
||||
# for line in lines:
|
||||
# inner += do_table_line(state, line);
|
||||
|
||||
# if state.already_opened_section:
|
||||
# inner += f"</{state.section_tag}>" "\n";
|
||||
|
||||
# # Consider the optional caption.
|
||||
# # If it happens, it goes before everything else
|
||||
# if optional_caption:
|
||||
# inner = f"<caption> {optional_caption} </caption>\n" + inner;
|
||||
|
||||
# table_close_tag = "</table>\n";
|
||||
|
||||
# return "\n\n" + table_open_tag + inner + table_close_tag + "\n\n";
|
||||
|
||||
# except SyntaxError as e:
|
||||
# print(f"caught syntax error: {e}");
|
||||
# print("moving on to next table...");
|
||||
# return m.group(0);
|
||||
|
||||
def handle_table_case_1(m):
|
||||
print("handle_table_case_1");
|
||||
|
||||
matched = m.group(0);
|
||||
|
||||
optional_table_open = m.group(1);
|
||||
print(f'matched = """{matched}"""');
|
||||
|
||||
one_or_more_header_lines = m.group(2);
|
||||
# required open table tag:
|
||||
table_open_tag = m.group(1);
|
||||
|
||||
header_lines = one_or_more_header_lines.strip().split("\n")
|
||||
# remove the 'markdown="1"' syntax
|
||||
table_open_tag = re.sub(r"markdown=(?:\"1\"|'1'|1)", "", table_open_tag);
|
||||
|
||||
# zero or more header rows:
|
||||
header_rows = m.group(2);
|
||||
|
||||
# required seperator line:
|
||||
seperator_line = m.group(3);
|
||||
|
||||
one_or_more_body_lines = m.group(4);
|
||||
|
||||
body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")]
|
||||
# zero or more body rows, with empty lines of one:
|
||||
body_rows = m.group(4);
|
||||
|
||||
# optional caption:
|
||||
optional_caption = m.group(5);
|
||||
|
||||
assert(seperator_line is not None)
|
||||
try:
|
||||
inner = "";
|
||||
|
||||
# Process the (optional) header lines:
|
||||
if header_rows is not None:
|
||||
state = State(section_tag = "thead", default_cell_tag = "th");
|
||||
|
||||
for line in header_rows.strip().split('\n'):
|
||||
inner += do_table_line(state, line);
|
||||
|
||||
if state.already_opened_section:
|
||||
inner += f"</{state.section_tag}>" "\n";
|
||||
|
||||
# Handle line seperator:
|
||||
column_info = parse_colinfo(seperator_line);
|
||||
|
||||
if body_rows is not None and body_rows.strip():
|
||||
for body in body_rows.strip().split('\n\n'):
|
||||
state = State(section_tag = "tbody", \
|
||||
default_cell_tag = "td", \
|
||||
column_info = column_info);
|
||||
|
||||
for line in body.strip().split('\n'):
|
||||
inner += do_table_line(state, line);
|
||||
|
||||
if state.already_opened_section:
|
||||
inner += f"</{state.section_tag}>" "\n";
|
||||
|
||||
# Consider the optional caption.
|
||||
# If it happens, it goes before everything else
|
||||
if optional_caption:
|
||||
inner = f"<caption> {optional_caption} </caption>\n" + inner;
|
||||
|
||||
table_open_tag = table_open_tag + "\n";
|
||||
table_close_tag = "</table>\n";
|
||||
|
||||
return "\n\n" + table_open_tag + inner + table_close_tag + "\n";
|
||||
except SyntaxError as e:
|
||||
print(f"caught syntax error: {e}");
|
||||
print("moving on to next table...");
|
||||
return m.group(0);
|
||||
|
||||
def handle_table_case_2(m):
|
||||
print("handle_table_case_2");
|
||||
|
||||
matched = m.group(0);
|
||||
|
||||
print(f'matched = """{matched}"""');
|
||||
|
||||
# no open table tag:
|
||||
|
||||
# one or more header rows:
|
||||
header_rows = m.group(1);
|
||||
|
||||
# line seperator:
|
||||
seperator_line = m.group(2);
|
||||
|
||||
print(f'seperator_line = "{seperator_line.strip()}"');
|
||||
|
||||
# one or more body rows, with empty lines of one:
|
||||
body_rows = m.group(3);
|
||||
|
||||
print(f'body_rows = "{body_rows}"');
|
||||
|
||||
# optional caption:
|
||||
optional_caption = m.group(4);
|
||||
|
||||
# no close table tag:
|
||||
|
||||
try:
|
||||
# handle explicit table tag?
|
||||
if optional_table_open:
|
||||
open_tag = optional_table_open + "\n";
|
||||
else:
|
||||
# otherwise, add a default one:
|
||||
open_tag = "<table>" + "\n";
|
||||
|
||||
inner = "";
|
||||
|
||||
state = State(section_tag = "thead", default_cell_tag = "th");
|
||||
|
||||
# Process the header lines:
|
||||
for line in header_lines:
|
||||
inner = do_table_line(state, line);
|
||||
# Process the required header lines:
|
||||
for line in header_rows.strip().split('\n'):
|
||||
inner += do_table_line(state, line);
|
||||
|
||||
if state.already_opened_section:
|
||||
inner += f"</{state.section_tag}>" "\n";
|
||||
|
|
@ -464,13 +645,12 @@ def handle_table(m):
|
|||
# Handle line seperator:
|
||||
column_info = parse_colinfo(seperator_line);
|
||||
|
||||
# Process the body lines:
|
||||
for lines in body_lines:
|
||||
for body in body_rows.strip().split('\n\n'):
|
||||
state = State(section_tag = "tbody", \
|
||||
default_cell_tag = "td", \
|
||||
column_info = column_info);
|
||||
|
||||
for line in lines:
|
||||
for line in body.strip().split('\n'):
|
||||
inner += do_table_line(state, line);
|
||||
|
||||
if state.already_opened_section:
|
||||
|
|
@ -481,39 +661,47 @@ def handle_table(m):
|
|||
if optional_caption:
|
||||
inner = f"<caption> {optional_caption} </caption>\n" + inner;
|
||||
|
||||
close_tag = "</table>\n";
|
||||
table_open_tag = "<table>\n";
|
||||
table_close_tag = "</table>\n";
|
||||
|
||||
return "\n\n" + open_tag + inner + close_tag + "\n\n";
|
||||
return "\n\n" + table_open_tag + inner + table_close_tag + "\n";
|
||||
except SyntaxError as e:
|
||||
print(f"caught syntax error: {e}");
|
||||
print("moving on to next table...");
|
||||
return m.group(0);
|
||||
|
||||
def handle_table_no_sep(m):
|
||||
print("handle_table_no_sep");
|
||||
def handle_table_case_3(m):
|
||||
print("handle_table_case_3");
|
||||
|
||||
matched = m.group(0);
|
||||
|
||||
print(f'matched = """{matched}"""');
|
||||
|
||||
table_open_tag = m.group(1) + "\n";
|
||||
# required open table tag:
|
||||
table_open_tag = m.group(1);
|
||||
|
||||
one_or_more_body_lines = m.group(2);
|
||||
# remove the 'markdown="1"' syntax
|
||||
table_open_tag = re.sub(r"markdown=(?:\"1\"|'1'|1)", "", table_open_tag);
|
||||
|
||||
body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")]
|
||||
# one or more body rows, with empty lines of one:
|
||||
body_rows = m.group(2);
|
||||
|
||||
# no line seperator
|
||||
|
||||
# optional caption:
|
||||
optional_caption = m.group(3);
|
||||
|
||||
# optional close table tag.
|
||||
|
||||
try:
|
||||
inner = "";
|
||||
|
||||
# Process the body lines:
|
||||
for lines in body_lines:
|
||||
for body in body_rows.strip().split('\n\n'):
|
||||
state = State(section_tag = "tbody", \
|
||||
default_cell_tag = "td", \
|
||||
column_info = []);
|
||||
|
||||
for line in lines:
|
||||
for line in body.strip().split('\n'):
|
||||
inner += do_table_line(state, line);
|
||||
|
||||
if state.already_opened_section:
|
||||
|
|
@ -524,10 +712,10 @@ def handle_table_no_sep(m):
|
|||
if optional_caption:
|
||||
inner = f"<caption> {optional_caption} </caption>\n" + inner;
|
||||
|
||||
table_open_tag = table_open_tag + "\n";
|
||||
table_close_tag = "</table>\n";
|
||||
|
||||
return "\n\n" + table_open_tag + inner + table_close_tag + "\n\n";
|
||||
|
||||
return "\n\n" + table_open_tag + inner + table_close_tag + "\n";
|
||||
except SyntaxError as e:
|
||||
print(f"caught syntax error: {e}");
|
||||
print("moving on to next table...");
|
||||
|
|
@ -537,7 +725,7 @@ with open("test.md") as stream:
|
|||
text = stream.read();
|
||||
|
||||
# delimiters between cells
|
||||
delimiter = r"(?:[|]|<(?:tr|th)(?:[\s]+[^<>]*)?>)";
|
||||
delimiter = r"(?:[|]|<(?:td|th)(?:[\s]+[^<>]*)?>)";
|
||||
|
||||
# A row is anything with at least one delimiter
|
||||
row = fr"(?: .* {delimiter} .*)";
|
||||
|
|
@ -560,35 +748,61 @@ row = fr"(?:{row} | {table_part}+)";
|
|||
# Between the header rows and the body rows there is a line seperator.
|
||||
seperator_line = r"\s* [|]? \s* [-=:]+ \s* (?: \s* [|] \s* [-=:]* \s* )* \s*"
|
||||
|
||||
# Regex for whole table:
|
||||
for o, c in product((1, 0), repeat=2):
|
||||
table = fr"""
|
||||
# two blank lines:
|
||||
[\n]{{2}}
|
||||
table = fr"""
|
||||
# two blank lines:
|
||||
[\n]{{2}}
|
||||
|
||||
# optional or required open table tag:
|
||||
(?:(<table[\s]+[^<>]*markdown=(?:"1"|'1'|1)[^<>]*>) \n){{{o},1}}
|
||||
# required open table tag:
|
||||
(?:(<table[\s]+[^<>]*markdown=(?:"1"|'1'|1)[^<>]*>) \n)
|
||||
|
||||
# zero or one or more header rows:
|
||||
((?: {row} \n){{{1-o},}})
|
||||
# zero or more header rows:
|
||||
((?: {row} \n)+)
|
||||
|
||||
# line seperator:
|
||||
({seperator_line}) [\n]
|
||||
# required line seperator:
|
||||
({seperator_line} [\n])
|
||||
|
||||
# zero or one or more body rows, with empty lines of one:
|
||||
((?: {row} [\n]{{1,2}}){{{1-c},}})
|
||||
# zero or more body rows, with empty lines of one:
|
||||
((?: {row} [\n]{{1,2}})*)
|
||||
|
||||
# optional caption:
|
||||
(?: \[ ([a-z0-9 "']+) \] \n)?
|
||||
# optional caption:
|
||||
(?: \[ ([a-z0-9 "']+) \] \n)?
|
||||
|
||||
# optional or required close table tag:
|
||||
(?: </table> [\n]){{{c},1}}
|
||||
# optional close table tag:
|
||||
(?: </table> [\n])?
|
||||
|
||||
# two blank lines:
|
||||
[\n]{{2}}
|
||||
""";
|
||||
# two blank lines (another newline already matched earlier)
|
||||
[\n]{{1}}
|
||||
""";
|
||||
|
||||
text = re.sub(table, handle_table, text, flags=re.VERBOSE)
|
||||
text = re.sub(table, handle_table_case_1, text, flags=re.VERBOSE)
|
||||
|
||||
table = fr"""
|
||||
# two blank lines:
|
||||
[\n]{{2}}
|
||||
|
||||
# no open table tag:
|
||||
# (?:(<table[\s]+[^<>]*markdown=(?:"1"|'1'|1)[^<>]*>) \n)?
|
||||
|
||||
# one or more header rows:
|
||||
((?: {row} \n)+)
|
||||
|
||||
# line seperator:
|
||||
({seperator_line} [\n])
|
||||
|
||||
# one or more body rows, with empty lines of one:
|
||||
((?: {row} [\n]{{1,2}})+)
|
||||
|
||||
# optional caption:
|
||||
(?: \[ ([a-z0-9 "']+) \] \n)?
|
||||
|
||||
# no close table tag:
|
||||
# (?: </table> [\n])?
|
||||
|
||||
# two blank lines (another newline already matched earlier)
|
||||
[\n]{{1}}
|
||||
""";
|
||||
|
||||
text = re.sub(table, handle_table_case_2, text, flags=re.VERBOSE)
|
||||
|
||||
table = fr"""
|
||||
# two blank lines:
|
||||
|
|
@ -600,17 +814,19 @@ table = fr"""
|
|||
# one or more body rows, with empty lines of one:
|
||||
((?: {row} [\n]{{1,2}})+)
|
||||
|
||||
# no line seperator
|
||||
|
||||
# optional caption:
|
||||
(?: \[ ([a-z0-9 "']+) \] \n)?
|
||||
|
||||
# required close table tag:
|
||||
(?: </table> [\n])
|
||||
# optional close table tag:
|
||||
(?: </table> [\n])?
|
||||
|
||||
# two blank lines:
|
||||
[\n]{{2}}
|
||||
# two blank lines (another newline already matched earlier)
|
||||
[\n]{{1}}
|
||||
""";
|
||||
|
||||
text = re.sub(table, handle_table_no_sep, text, flags=re.VERBOSE)
|
||||
text = re.sub(table, handle_table_case_3, text, flags=re.VERBOSE)
|
||||
|
||||
text += """
|
||||
<style>
|
||||
|
|
|
|||
Loading…
Reference in a new issue