diff --git a/test.md b/test.md
index 33df2ac..46a774d 100644
--- a/test.md
+++ b/test.md
@@ -6,6 +6,15 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nunc non fr
+line 9 | 2
+- | -
+3 | 4
+
+
+xyz
+
+
+
@@ -76,7 +85,7 @@ tiptoe | 2
-
- 1
- 2
- 3
| foo
+ - 1
- 2
- 3
| foo88
foo bar | baz
| -
| bar
@@ -105,7 +114,7 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nunc non fr
diff --git a/test.py b/test.py
index bdb2a6a..310394e 100644
--- a/test.py
+++ b/test.py
@@ -424,39 +424,220 @@ def do_table_line(state, line):
def handle_table(m):
print("handle_table");
+ assert(not "TODO");
+ # matched = m.group(0);
+
+ # optional_table_open = m.group(1);
+
+ # one_or_more_header_lines = m.group(2);
+
+ # header_lines = one_or_more_header_lines.strip().split("\n")
+
+ # seperator_line = m.group(3);
+
+ # one_or_more_body_lines = m.group(4);
+
+ # body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")]
+
+ # optional_caption = m.group(5);
+
+ # assert(seperator_line is not None)
+
+ # try:
+ # # handle explicit table tag?
+ # if optional_table_open:
+ # open_tag = optional_table_open + "\n";
+ # else:
+ # # otherwise, add a default one:
+ # open_tag = "" + "\n";
+
+ # inner = "";
+
+ # state = State(section_tag = "thead", default_cell_tag = "th");
+
+ # # Process the header lines:
+ # for line in header_lines:
+ # inner = do_table_line(state, line);
+
+ # if state.already_opened_section:
+ # inner += f"{state.section_tag}>" "\n";
+
+ # # Handle line seperator:
+ # column_info = parse_colinfo(seperator_line);
+
+ # # Process the body lines:
+ # for lines in body_lines:
+ # state = State(section_tag = "tbody", \
+ # default_cell_tag = "td", \
+ # column_info = column_info);
+
+ # for line in lines:
+ # inner += do_table_line(state, line);
+
+ # if state.already_opened_section:
+ # inner += f"{state.section_tag}>" "\n";
+
+ # # Consider the optional caption.
+ # # If it happens, it goes before everything else
+ # if optional_caption:
+ # inner = f" {optional_caption} \n" + inner;
+
+ # close_tag = "
\n";
+
+ # return "\n\n" + open_tag + inner + close_tag + "\n\n";
+ # except SyntaxError as e:
+ # print(f"caught syntax error: {e}");
+ # print("moving on to next table...");
+ # return m.group(0);
+
+def handle_table_no_sep(m):
+ print("handle_table_no_sep");
+
+ assert(not "TODO");
+
+ # matched = m.group(0);
+
+ # print(f'matched = """{matched}"""');
+
+ # table_open_tag = m.group(1) + "\n";
+
+ # one_or_more_body_lines = m.group(2);
+
+ # body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")]
+
+ # optional_caption = m.group(3);
+
+ # try:
+ # inner = "";
+
+ # # Process the body lines:
+ # for lines in body_lines:
+ # state = State(section_tag = "tbody", \
+ # default_cell_tag = "td", \
+ # column_info = []);
+
+ # for line in lines:
+ # inner += do_table_line(state, line);
+
+ # if state.already_opened_section:
+ # inner += f"{state.section_tag}>" "\n";
+
+ # # Consider the optional caption.
+ # # If it happens, it goes before everything else
+ # if optional_caption:
+ # inner = f" {optional_caption} \n" + inner;
+
+ # table_close_tag = "\n";
+
+ # return "\n\n" + table_open_tag + inner + table_close_tag + "\n\n";
+
+ # except SyntaxError as e:
+ # print(f"caught syntax error: {e}");
+ # print("moving on to next table...");
+ # return m.group(0);
+
+def handle_table_case_1(m):
+ print("handle_table_case_1");
+
matched = m.group(0);
- optional_table_open = m.group(1);
+ print(f'matched = """{matched}"""');
- one_or_more_header_lines = m.group(2);
+ # required open table tag:
+ table_open_tag = m.group(1);
- header_lines = one_or_more_header_lines.strip().split("\n")
+ # remove the 'markdown="1"' syntax
+ table_open_tag = re.sub(r"markdown=(?:\"1\"|'1'|1)", "", table_open_tag);
+ # zero or more header rows:
+ header_rows = m.group(2);
+
+ # required seperator line:
seperator_line = m.group(3);
- one_or_more_body_lines = m.group(4);
-
- body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")]
+ # zero or more body rows, with empty lines of one:
+ body_rows = m.group(4);
+ # optional caption:
optional_caption = m.group(5);
- assert(seperator_line is not None)
+ try:
+ inner = "";
+
+ # Process the (optional) header lines:
+ if header_rows is not None:
+ state = State(section_tag = "thead", default_cell_tag = "th");
+
+ for line in header_rows.strip().split('\n'):
+ inner += do_table_line(state, line);
+
+ if state.already_opened_section:
+ inner += f"{state.section_tag}>" "\n";
+
+ # Handle line seperator:
+ column_info = parse_colinfo(seperator_line);
+
+ if body_rows is not None and body_rows.strip():
+ for body in body_rows.strip().split('\n\n'):
+ state = State(section_tag = "tbody", \
+ default_cell_tag = "td", \
+ column_info = column_info);
+
+ for line in body.strip().split('\n'):
+ inner += do_table_line(state, line);
+
+ if state.already_opened_section:
+ inner += f"{state.section_tag}>" "\n";
+
+ # Consider the optional caption.
+ # If it happens, it goes before everything else
+ if optional_caption:
+ inner = f" {optional_caption} \n" + inner;
+
+ table_open_tag = table_open_tag + "\n";
+ table_close_tag = "\n";
+
+ return "\n\n" + table_open_tag + inner + table_close_tag + "\n";
+ except SyntaxError as e:
+ print(f"caught syntax error: {e}");
+ print("moving on to next table...");
+ return m.group(0);
+
+def handle_table_case_2(m):
+ print("handle_table_case_2");
+
+ matched = m.group(0);
+
+ print(f'matched = """{matched}"""');
+
+ # no open table tag:
+
+ # one or more header rows:
+ header_rows = m.group(1);
+
+ # line seperator:
+ seperator_line = m.group(2);
+
+ print(f'seperator_line = "{seperator_line.strip()}"');
+
+ # one or more body rows, with empty lines of one:
+ body_rows = m.group(3);
+
+ print(f'body_rows = "{body_rows}"');
+
+ # optional caption:
+ optional_caption = m.group(4);
+
+ # no close table tag:
try:
- # handle explicit table tag?
- if optional_table_open:
- open_tag = optional_table_open + "\n";
- else:
- # otherwise, add a default one:
- open_tag = "" + "\n";
-
inner = "";
state = State(section_tag = "thead", default_cell_tag = "th");
- # Process the header lines:
- for line in header_lines:
- inner = do_table_line(state, line);
+ # Process the required header lines:
+ for line in header_rows.strip().split('\n'):
+ inner += do_table_line(state, line);
if state.already_opened_section:
inner += f"{state.section_tag}>" "\n";
@@ -464,13 +645,12 @@ def handle_table(m):
# Handle line seperator:
column_info = parse_colinfo(seperator_line);
- # Process the body lines:
- for lines in body_lines:
+ for body in body_rows.strip().split('\n\n'):
state = State(section_tag = "tbody", \
default_cell_tag = "td", \
column_info = column_info);
- for line in lines:
+ for line in body.strip().split('\n'):
inner += do_table_line(state, line);
if state.already_opened_section:
@@ -481,39 +661,47 @@ def handle_table(m):
if optional_caption:
inner = f" {optional_caption} \n" + inner;
- close_tag = "
\n";
+ table_open_tag = "\n";
+ table_close_tag = "
\n";
- return "\n\n" + open_tag + inner + close_tag + "\n\n";
+ return "\n\n" + table_open_tag + inner + table_close_tag + "\n";
except SyntaxError as e:
print(f"caught syntax error: {e}");
print("moving on to next table...");
return m.group(0);
-def handle_table_no_sep(m):
- print("handle_table_no_sep");
+def handle_table_case_3(m):
+ print("handle_table_case_3");
matched = m.group(0);
print(f'matched = """{matched}"""');
- table_open_tag = m.group(1) + "\n";
+ # required open table tag:
+ table_open_tag = m.group(1);
- one_or_more_body_lines = m.group(2);
+ # remove the 'markdown="1"' syntax
+ table_open_tag = re.sub(r"markdown=(?:\"1\"|'1'|1)", "", table_open_tag);
- body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")]
+ # one or more body rows, with empty lines of one:
+ body_rows = m.group(2);
+ # no line seperator
+
+ # optional caption:
optional_caption = m.group(3);
+ # optional close table tag.
+
try:
inner = "";
- # Process the body lines:
- for lines in body_lines:
+ for body in body_rows.strip().split('\n\n'):
state = State(section_tag = "tbody", \
default_cell_tag = "td", \
column_info = []);
- for line in lines:
+ for line in body.strip().split('\n'):
inner += do_table_line(state, line);
if state.already_opened_section:
@@ -524,10 +712,10 @@ def handle_table_no_sep(m):
if optional_caption:
inner = f" {optional_caption} \n" + inner;
+ table_open_tag = table_open_tag + "\n";
table_close_tag = "\n";
- return "\n\n" + table_open_tag + inner + table_close_tag + "\n\n";
-
+ return "\n\n" + table_open_tag + inner + table_close_tag + "\n";
except SyntaxError as e:
print(f"caught syntax error: {e}");
print("moving on to next table...");
@@ -537,7 +725,7 @@ with open("test.md") as stream:
text = stream.read();
# delimiters between cells
-delimiter = r"(?:[|]|<(?:tr|th)(?:[\s]+[^<>]*)?>)";
+delimiter = r"(?:[|]|<(?:td|th)(?:[\s]+[^<>]*)?>)";
# A row is anything with at least one delimiter
row = fr"(?: .* {delimiter} .*)";
@@ -560,35 +748,61 @@ row = fr"(?:{row} | {table_part}+)";
# Between the header rows and the body rows there is a line seperator.
seperator_line = r"\s* [|]? \s* [-=:]+ \s* (?: \s* [|] \s* [-=:]* \s* )* \s*"
-# Regex for whole table:
-for o, c in product((1, 0), repeat=2):
- table = fr"""
- # two blank lines:
- [\n]{{2}}
+table = fr"""
+ # two blank lines:
+ [\n]{{2}}
- # optional or required open table tag:
- (?:(]*markdown=(?:"1"|'1'|1)[^<>]*>) \n){{{o},1}}
+ # required open table tag:
+ (?:(]*markdown=(?:"1"|'1'|1)[^<>]*>) \n)
- # zero or one or more header rows:
- ((?: {row} \n){{{1-o},}})
+ # zero or more header rows:
+ ((?: {row} \n)+)
- # line seperator:
- ({seperator_line}) [\n]
+ # required line seperator:
+ ({seperator_line} [\n])
- # zero or one or more body rows, with empty lines of one:
- ((?: {row} [\n]{{1,2}}){{{1-c},}})
+ # zero or more body rows, with empty lines of one:
+ ((?: {row} [\n]{{1,2}})*)
- # optional caption:
- (?: \[ ([a-z0-9 "']+) \] \n)?
+ # optional caption:
+ (?: \[ ([a-z0-9 "']+) \] \n)?
- # optional or required close table tag:
- (?:
[\n]){{{c},1}}
+ # optional close table tag:
+ (?:
[\n])?
- # two blank lines:
- [\n]{{2}}
- """;
+ # two blank lines (another newline already matched earlier)
+ [\n]{{1}}
+""";
- text = re.sub(table, handle_table, text, flags=re.VERBOSE)
+text = re.sub(table, handle_table_case_1, text, flags=re.VERBOSE)
+
+table = fr"""
+ # two blank lines:
+ [\n]{{2}}
+
+ # no open table tag:
+ # (?:(]*markdown=(?:"1"|'1'|1)[^<>]*>) \n)?
+
+ # one or more header rows:
+ ((?: {row} \n)+)
+
+ # line seperator:
+ ({seperator_line} [\n])
+
+ # one or more body rows, with empty lines of one:
+ ((?: {row} [\n]{{1,2}})+)
+
+ # optional caption:
+ (?: \[ ([a-z0-9 "']+) \] \n)?
+
+ # no close table tag:
+ # (?:
[\n])?
+
+ # two blank lines (another newline already matched earlier)
+ [\n]{{1}}
+""";
+
+text = re.sub(table, handle_table_case_2, text, flags=re.VERBOSE)
table = fr"""
# two blank lines:
@@ -600,17 +814,19 @@ table = fr"""
# one or more body rows, with empty lines of one:
((?: {row} [\n]{{1,2}})+)
+ # no line seperator
+
# optional caption:
(?: \[ ([a-z0-9 "']+) \] \n)?
- # required close table tag:
- (?: [\n])
+ # optional close table tag:
+ (?: [\n])?
- # two blank lines:
- [\n]{{2}}
+ # two blank lines (another newline already matched earlier)
+ [\n]{{1}}
""";
-text = re.sub(table, handle_table_no_sep, text, flags=re.VERBOSE)
+text = re.sub(table, handle_table_case_3, text, flags=re.VERBOSE)
text += """