diff --git a/flake.nix b/flake.nix index 23aa496..6dc124e 100644 --- a/flake.nix +++ b/flake.nix @@ -28,6 +28,7 @@ gedit python3 meld + zip ]; }; }); diff --git a/test.md b/test.md index b3d3959..33df2ac 100644 --- a/test.md +++ b/test.md @@ -30,6 +30,52 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nunc non fr + + +tiptoe | 2 + +3 | 4 +
+ + + + + + +tiptoe | 2 +3 | 4 +5 | 6 +[duh] +
+ + + + + + + +1 | 2 + +3 | 4 + +5 | 6 +
+ + + + + + +1 | 2 + +3 | 4 + +5 | 6 +[duh] +
+ + +
  1. 1
  2. 2
  3. 3
| foo foo bar | baz | - diff --git a/test.py b/test.py index ea32cb2..62550b0 100644 --- a/test.py +++ b/test.py @@ -67,6 +67,7 @@ def do_table_line(state, line): I'm supposed to return the whole HTML. Including the if needed. """ + print("do_table_line"); print(f'line = "{line}"'); @@ -74,7 +75,7 @@ def do_table_line(state, line): passthrough_pattern = fr"(?:<{tags}(?:[\s]+[^>]*)?>|)"; - start_tag_pattern = fr"<{state.section_tag}(?:[\s]+[^>]*)?>" + start_tag_pattern = fr"<(thead|tbody|tfoot)(?:[\s]+[^>]*)?>" open_tr_pattern = r"]*)?>[\s]*"; @@ -84,7 +85,7 @@ def do_table_line(state, line): caption_sentinel_pattern = "(?:" + \ '|'.join((cell_delimiter, passthrough_pattern, start_tag_pattern, - open_tr_pattern, open_caption_pattern)) + ")" + open_tr_pattern, open_caption_pattern, r'' + "\n"; out += m.group(0) + "\n"; @@ -103,6 +109,7 @@ def do_table_line(state, line): print(f'line = "{line}"'); + state.section_tag = m.group(1); state.already_opened_section = 1; continue; @@ -225,7 +232,6 @@ def do_table_line(state, line): line = line[1:]; print(f'line = "{line}"'); - # elif (m := re.match(r"<([a-z]+)(?:[\s]+([^<>]*))?>", line)): elif (m := re.match(r"<(th|td)(?:[\s]+([^<>]*))?>", line)): print("found opening HTML tag"); @@ -263,10 +269,10 @@ def do_table_line(state, line): line = line[len(m.group(0)):]; print(f'line = "{line}"'); - # elif (m := re.match(r"<([a-z]+)", line)): - # print("found HTML open, but it's incomplete? huh?! throwing!"); + elif (m := re.match(r"<(th|td)", line)): + print("found HTML open, but it's incomplete? huh?! throwing!"); - # raise SyntaxError("could not find '>' for HTML open tag"); + raise SyntaxError("could not find '>' for HTML open tag"); elif column_index < len(state.column_info): print("found nothing, defaulting to column info"); @@ -328,10 +334,6 @@ def do_table_line(state, line): line = line[len(m.group(0)):] print(f'line = "{line}"'); print(f'depth = {depth}'); - # elif (m := re.match(fr"", line)): - # # ignore the closers for table parts, no passthrough - # line = line[len(m.group(0)):] - # print(f'line = "{line}"'); elif (m := re.match("<[a-z]+(?:\\s+[^<>]*)?>", line)): content += m.group(0); line = line[len(m.group(0)):] @@ -373,83 +375,160 @@ def do_table_line(state, line): return out; -def do_table(table_open_tag, header_lines, seperator_line, body_lines, optional_caption): - - # handle explicit table tag? - if table_open_tag: - open_tag = table_open_tag + "\n"; - else: - # otherwise, add a default one: - open_tag = "" + "\n"; - - inner = ""; - - state = State(section_tag = "thead", default_cell_tag = "th"); - - # Process the header lines: - for line in header_lines: - html_table_line = do_table_line(state, line); - - inner += html_table_line; - - if state.already_opened_section: - inner += "" "\n"; - - # Handle line seperator: - column_info = parse_colinfo(seperator_line); - - # Process the body lines: - for lines in body_lines: - state = State(section_tag = "tbody", \ - default_cell_tag = "td", \ - column_info = column_info); - - for line in lines: - inner += do_table_line(state, line); - - if state.already_opened_section: - inner += "" "\n"; - - # Consider the optional caption. - # If it happens, it goes before everything else - if optional_caption: - inner = f"\n" + inner; - - close_tag = "
{optional_caption}
\n"; - - for o in inner.split("\n"): - print(o); - - return "\n\n" + open_tag + inner + close_tag + "\n\n"; +# def do_table(table_open_tag, header_lines, seperator_line, body_lines, optional_caption): +# +# # handle explicit table tag? +# if table_open_tag: +# open_tag = table_open_tag + "\n"; +# else: +# # otherwise, add a default one: +# open_tag = "" + "\n"; +# +# inner = ""; +# +# state = State(section_tag = "thead", default_cell_tag = "th"); +# +# # Process the header lines: +# for line in header_lines: +# inner = do_table_line(state, line); +# +# if state.already_opened_section: +# inner += f"" "\n"; +# +# # Handle line seperator: +# column_info = parse_colinfo(seperator_line); +# +# # Process the body lines: +# for lines in body_lines: +# state = State(section_tag = "tbody", \ +# default_cell_tag = "td", \ +# column_info = column_info); +# +# for line in lines: +# inner += do_table_line(state, line); +# +# if state.already_opened_section: +# inner += f"" "\n"; +# +# # Consider the optional caption. +# # If it happens, it goes before everything else +# if optional_caption: +# inner = f"\n" + inner; +# +# close_tag = "
{optional_caption}
\n"; +# +# for o in inner.split("\n"): +# print(o); +# +# return "\n\n" + open_tag + inner + close_tag + "\n\n"; def handle_table(m): print("handle_table"); matched = m.group(0); - print(f'matched = """{matched}"""'); - optional_table_open = m.group(1); one_or_more_header_lines = m.group(2); + header_lines = one_or_more_header_lines.strip().split("\n") + seperator_line = m.group(3); one_or_more_body_lines = m.group(4); + body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")] + optional_caption = m.group(5); assert(seperator_line is not None) try: - return do_table( - optional_table_open, - one_or_more_header_lines.strip().split("\n"), - seperator_line, - [e.strip().split("\n") - for e in one_or_more_body_lines.strip().split("\n\n")], - optional_caption, - ); + # handle explicit table tag? + if optional_table_open: + open_tag = optional_table_open + "\n"; + else: + # otherwise, add a default one: + open_tag = "" + "\n"; + + inner = ""; + + state = State(section_tag = "thead", default_cell_tag = "th"); + + # Process the header lines: + for line in header_lines: + inner = do_table_line(state, line); + + if state.already_opened_section: + inner += f"" "\n"; + + # Handle line seperator: + column_info = parse_colinfo(seperator_line); + + # Process the body lines: + for lines in body_lines: + state = State(section_tag = "tbody", \ + default_cell_tag = "td", \ + column_info = column_info); + + for line in lines: + inner += do_table_line(state, line); + + if state.already_opened_section: + inner += f"" "\n"; + + # Consider the optional caption. + # If it happens, it goes before everything else + if optional_caption: + inner = f"\n" + inner; + + close_tag = "
{optional_caption}
\n"; + + return "\n\n" + open_tag + inner + close_tag + "\n\n"; + except SyntaxError as e: + print(f"caught syntax error: {e}"); + print("moving on to next table..."); + return m.group(0); + +def handle_table_no_sep(m): + print("handle_table_no_sep"); + + matched = m.group(0); + + print(f'matched = """{matched}"""'); + + table_open_tag = m.group(1) + "\n"; + + one_or_more_body_lines = m.group(2); + + body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")] + + optional_caption = m.group(3); + + try: + inner = ""; + + # Process the body lines: + for lines in body_lines: + state = State(section_tag = "tbody", \ + default_cell_tag = "td", \ + column_info = []); + + for line in lines: + inner += do_table_line(state, line); + + if state.already_opened_section: + inner += f"" "\n"; + + # Consider the optional caption. + # If it happens, it goes before everything else + if optional_caption: + inner = f" {optional_caption} \n" + inner; + + table_close_tag = "\n"; + + return "\n\n" + table_open_tag + inner + table_close_tag + "\n\n"; + except SyntaxError as e: print(f"caught syntax error: {e}"); print("moving on to next table..."); @@ -489,7 +568,7 @@ for o, c in product((1, 0), repeat=2): [\n]{{2}} # optional or required open table tag: - (?:(]*markdown="1"[^<>]*>) \n){{{o},1}} + (?:(]*markdown=(?:"1"|'1'|1)[^<>]*>) \n){{{o},1}} # zero or one or more header rows: ((?: {row} \n){{{1-o},}}) @@ -512,6 +591,28 @@ for o, c in product((1, 0), repeat=2): text = re.sub(table, handle_table, text, flags=re.VERBOSE) +table = fr""" + # two blank lines: + [\n]{{2}} + + # required open table tag: + (?:(]*markdown=(?:"1"|'1'|1)[^<>]*>) \n) + + # one or more body rows, with empty lines of one: + ((?: {row} [\n]{{1,2}})+) + + # optional caption: + (?: \[ ([a-z0-9 "']+) \] \n)? + + # required close table tag: + (?: [\n]) + + # two blank lines: + [\n]{{2}} +"""; + +text = re.sub(table, handle_table_no_sep, text, flags=re.VERBOSE) + text += """