From f487d02f1c5fb91361703bc01cdffbcba600a62a Mon Sep 17 00:00:00 2001 From: Zander Thannhauser Date: Sun, 14 Dec 2025 12:11:56 -0600 Subject: [PATCH] settling in --- test.md | 13 ++- test.py | 334 ++++++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 286 insertions(+), 61 deletions(-) diff --git a/test.md b/test.md index 33df2ac..46a774d 100644 --- a/test.md +++ b/test.md @@ -6,6 +6,15 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nunc non fr +line 9 | 2 +- | - +3 | 4 + + +xyz + + + @@ -76,7 +85,7 @@ tiptoe | 2 -
  1. 1
  2. 2
  3. 3
| foo +
  1. 1
  2. 2
  3. 3
| foo88 foo bar | baz | - | bar @@ -105,7 +114,7 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nunc non fr -|a +|a117 |-
diff --git a/test.py b/test.py index bdb2a6a..310394e 100644 --- a/test.py +++ b/test.py @@ -424,39 +424,220 @@ def do_table_line(state, line): def handle_table(m): print("handle_table"); + assert(not "TODO"); + # matched = m.group(0); + + # optional_table_open = m.group(1); + + # one_or_more_header_lines = m.group(2); + + # header_lines = one_or_more_header_lines.strip().split("\n") + + # seperator_line = m.group(3); + + # one_or_more_body_lines = m.group(4); + + # body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")] + + # optional_caption = m.group(5); + + # assert(seperator_line is not None) + + # try: + # # handle explicit table tag? + # if optional_table_open: + # open_tag = optional_table_open + "\n"; + # else: + # # otherwise, add a default one: + # open_tag = "" + "\n"; + + # inner = ""; + + # state = State(section_tag = "thead", default_cell_tag = "th"); + + # # Process the header lines: + # for line in header_lines: + # inner = do_table_line(state, line); + + # if state.already_opened_section: + # inner += f"" "\n"; + + # # Handle line seperator: + # column_info = parse_colinfo(seperator_line); + + # # Process the body lines: + # for lines in body_lines: + # state = State(section_tag = "tbody", \ + # default_cell_tag = "td", \ + # column_info = column_info); + + # for line in lines: + # inner += do_table_line(state, line); + + # if state.already_opened_section: + # inner += f"" "\n"; + + # # Consider the optional caption. + # # If it happens, it goes before everything else + # if optional_caption: + # inner = f"\n" + inner; + + # close_tag = "
{optional_caption}
\n"; + + # return "\n\n" + open_tag + inner + close_tag + "\n\n"; + # except SyntaxError as e: + # print(f"caught syntax error: {e}"); + # print("moving on to next table..."); + # return m.group(0); + +def handle_table_no_sep(m): + print("handle_table_no_sep"); + + assert(not "TODO"); + + # matched = m.group(0); + + # print(f'matched = """{matched}"""'); + + # table_open_tag = m.group(1) + "\n"; + + # one_or_more_body_lines = m.group(2); + + # body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")] + + # optional_caption = m.group(3); + + # try: + # inner = ""; + + # # Process the body lines: + # for lines in body_lines: + # state = State(section_tag = "tbody", \ + # default_cell_tag = "td", \ + # column_info = []); + + # for line in lines: + # inner += do_table_line(state, line); + + # if state.already_opened_section: + # inner += f"" "\n"; + + # # Consider the optional caption. + # # If it happens, it goes before everything else + # if optional_caption: + # inner = f" {optional_caption} \n" + inner; + + # table_close_tag = "\n"; + + # return "\n\n" + table_open_tag + inner + table_close_tag + "\n\n"; + + # except SyntaxError as e: + # print(f"caught syntax error: {e}"); + # print("moving on to next table..."); + # return m.group(0); + +def handle_table_case_1(m): + print("handle_table_case_1"); + matched = m.group(0); - optional_table_open = m.group(1); + print(f'matched = """{matched}"""'); - one_or_more_header_lines = m.group(2); + # required open table tag: + table_open_tag = m.group(1); - header_lines = one_or_more_header_lines.strip().split("\n") + # remove the 'markdown="1"' syntax + table_open_tag = re.sub(r"markdown=(?:\"1\"|'1'|1)", "", table_open_tag); + # zero or more header rows: + header_rows = m.group(2); + + # required seperator line: seperator_line = m.group(3); - one_or_more_body_lines = m.group(4); - - body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")] + # zero or more body rows, with empty lines of one: + body_rows = m.group(4); + # optional caption: optional_caption = m.group(5); - assert(seperator_line is not None) + try: + inner = ""; + + # Process the (optional) header lines: + if header_rows is not None: + state = State(section_tag = "thead", default_cell_tag = "th"); + + for line in header_rows.strip().split('\n'): + inner += do_table_line(state, line); + + if state.already_opened_section: + inner += f"" "\n"; + + # Handle line seperator: + column_info = parse_colinfo(seperator_line); + + if body_rows is not None and body_rows.strip(): + for body in body_rows.strip().split('\n\n'): + state = State(section_tag = "tbody", \ + default_cell_tag = "td", \ + column_info = column_info); + + for line in body.strip().split('\n'): + inner += do_table_line(state, line); + + if state.already_opened_section: + inner += f"" "\n"; + + # Consider the optional caption. + # If it happens, it goes before everything else + if optional_caption: + inner = f" {optional_caption} \n" + inner; + + table_open_tag = table_open_tag + "\n"; + table_close_tag = "\n"; + + return "\n\n" + table_open_tag + inner + table_close_tag + "\n"; + except SyntaxError as e: + print(f"caught syntax error: {e}"); + print("moving on to next table..."); + return m.group(0); + +def handle_table_case_2(m): + print("handle_table_case_2"); + + matched = m.group(0); + + print(f'matched = """{matched}"""'); + + # no open table tag: + + # one or more header rows: + header_rows = m.group(1); + + # line seperator: + seperator_line = m.group(2); + + print(f'seperator_line = "{seperator_line.strip()}"'); + + # one or more body rows, with empty lines of one: + body_rows = m.group(3); + + print(f'body_rows = "{body_rows}"'); + + # optional caption: + optional_caption = m.group(4); + + # no close table tag: try: - # handle explicit table tag? - if optional_table_open: - open_tag = optional_table_open + "\n"; - else: - # otherwise, add a default one: - open_tag = "" + "\n"; - inner = ""; state = State(section_tag = "thead", default_cell_tag = "th"); - # Process the header lines: - for line in header_lines: - inner = do_table_line(state, line); + # Process the required header lines: + for line in header_rows.strip().split('\n'): + inner += do_table_line(state, line); if state.already_opened_section: inner += f"" "\n"; @@ -464,13 +645,12 @@ def handle_table(m): # Handle line seperator: column_info = parse_colinfo(seperator_line); - # Process the body lines: - for lines in body_lines: + for body in body_rows.strip().split('\n\n'): state = State(section_tag = "tbody", \ default_cell_tag = "td", \ column_info = column_info); - for line in lines: + for line in body.strip().split('\n'): inner += do_table_line(state, line); if state.already_opened_section: @@ -481,39 +661,47 @@ def handle_table(m): if optional_caption: inner = f"\n" + inner; - close_tag = "
{optional_caption}
\n"; + table_open_tag = "\n"; + table_close_tag = "
\n"; - return "\n\n" + open_tag + inner + close_tag + "\n\n"; + return "\n\n" + table_open_tag + inner + table_close_tag + "\n"; except SyntaxError as e: print(f"caught syntax error: {e}"); print("moving on to next table..."); return m.group(0); -def handle_table_no_sep(m): - print("handle_table_no_sep"); +def handle_table_case_3(m): + print("handle_table_case_3"); matched = m.group(0); print(f'matched = """{matched}"""'); - table_open_tag = m.group(1) + "\n"; + # required open table tag: + table_open_tag = m.group(1); - one_or_more_body_lines = m.group(2); + # remove the 'markdown="1"' syntax + table_open_tag = re.sub(r"markdown=(?:\"1\"|'1'|1)", "", table_open_tag); - body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")] + # one or more body rows, with empty lines of one: + body_rows = m.group(2); + # no line seperator + + # optional caption: optional_caption = m.group(3); + # optional close table tag. + try: inner = ""; - # Process the body lines: - for lines in body_lines: + for body in body_rows.strip().split('\n\n'): state = State(section_tag = "tbody", \ default_cell_tag = "td", \ column_info = []); - for line in lines: + for line in body.strip().split('\n'): inner += do_table_line(state, line); if state.already_opened_section: @@ -524,10 +712,10 @@ def handle_table_no_sep(m): if optional_caption: inner = f" {optional_caption} \n" + inner; + table_open_tag = table_open_tag + "\n"; table_close_tag = "\n"; - return "\n\n" + table_open_tag + inner + table_close_tag + "\n\n"; - + return "\n\n" + table_open_tag + inner + table_close_tag + "\n"; except SyntaxError as e: print(f"caught syntax error: {e}"); print("moving on to next table..."); @@ -537,7 +725,7 @@ with open("test.md") as stream: text = stream.read(); # delimiters between cells -delimiter = r"(?:[|]|<(?:tr|th)(?:[\s]+[^<>]*)?>)"; +delimiter = r"(?:[|]|<(?:td|th)(?:[\s]+[^<>]*)?>)"; # A row is anything with at least one delimiter row = fr"(?: .* {delimiter} .*)"; @@ -560,35 +748,61 @@ row = fr"(?:{row} | {table_part}+)"; # Between the header rows and the body rows there is a line seperator. seperator_line = r"\s* [|]? \s* [-=:]+ \s* (?: \s* [|] \s* [-=:]* \s* )* \s*" -# Regex for whole table: -for o, c in product((1, 0), repeat=2): - table = fr""" - # two blank lines: - [\n]{{2}} +table = fr""" + # two blank lines: + [\n]{{2}} - # optional or required open table tag: - (?:(]*markdown=(?:"1"|'1'|1)[^<>]*>) \n){{{o},1}} + # required open table tag: + (?:(]*markdown=(?:"1"|'1'|1)[^<>]*>) \n) - # zero or one or more header rows: - ((?: {row} \n){{{1-o},}}) + # zero or more header rows: + ((?: {row} \n)+) - # line seperator: - ({seperator_line}) [\n] + # required line seperator: + ({seperator_line} [\n]) - # zero or one or more body rows, with empty lines of one: - ((?: {row} [\n]{{1,2}}){{{1-c},}}) + # zero or more body rows, with empty lines of one: + ((?: {row} [\n]{{1,2}})*) - # optional caption: - (?: \[ ([a-z0-9 "']+) \] \n)? + # optional caption: + (?: \[ ([a-z0-9 "']+) \] \n)? - # optional or required close table tag: - (?: [\n]){{{c},1}} + # optional close table tag: + (?: [\n])? - # two blank lines: - [\n]{{2}} - """; + # two blank lines (another newline already matched earlier) + [\n]{{1}} +"""; - text = re.sub(table, handle_table, text, flags=re.VERBOSE) +text = re.sub(table, handle_table_case_1, text, flags=re.VERBOSE) + +table = fr""" + # two blank lines: + [\n]{{2}} + + # no open table tag: + # (?:(]*markdown=(?:"1"|'1'|1)[^<>]*>) \n)? + + # one or more header rows: + ((?: {row} \n)+) + + # line seperator: + ({seperator_line} [\n]) + + # one or more body rows, with empty lines of one: + ((?: {row} [\n]{{1,2}})+) + + # optional caption: + (?: \[ ([a-z0-9 "']+) \] \n)? + + # no close table tag: + # (?: [\n])? + + # two blank lines (another newline already matched earlier) + [\n]{{1}} +"""; + +text = re.sub(table, handle_table_case_2, text, flags=re.VERBOSE) table = fr""" # two blank lines: @@ -600,17 +814,19 @@ table = fr""" # one or more body rows, with empty lines of one: ((?: {row} [\n]{{1,2}})+) + # no line seperator + # optional caption: (?: \[ ([a-z0-9 "']+) \] \n)? - # required close table tag: - (?: [\n]) + # optional close table tag: + (?: [\n])? - # two blank lines: - [\n]{{2}} + # two blank lines (another newline already matched earlier) + [\n]{{1}} """; -text = re.sub(table, handle_table_no_sep, text, flags=re.VERBOSE) +text = re.sub(table, handle_table_case_3, text, flags=re.VERBOSE) text += """