diff --git a/flake.nix b/flake.nix
index 23aa496..6dc124e 100644
--- a/flake.nix
+++ b/flake.nix
@@ -28,6 +28,7 @@
gedit
python3
meld
+ zip
];
};
});
diff --git a/test.md b/test.md
index b3d3959..33df2ac 100644
--- a/test.md
+++ b/test.md
@@ -30,6 +30,52 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nunc non fr
+
+
+
+
+
+
+
+
+tiptoe | 2
+3 | 4
+5 | 6
+[duh]
+
+
+
+
+
+
+
+
+1 | 2
+
+3 | 4
+
+5 | 6
+
+
+
+
+
+
+
+1 | 2
+
+3 | 4
+
+5 | 6
+[duh]
+
+
+
+
- 1
- 2
- 3
| foo
foo bar | baz
| -
diff --git a/test.py b/test.py
index ea32cb2..62550b0 100644
--- a/test.py
+++ b/test.py
@@ -67,6 +67,7 @@ def do_table_line(state, line):
I'm supposed to return the whole HTML.
Including the if needed.
"""
+ print("do_table_line");
print(f'line = "{line}"');
@@ -74,7 +75,7 @@ def do_table_line(state, line):
passthrough_pattern = fr"(?:<{tags}(?:[\s]+[^>]*)?>|{tags}>)";
- start_tag_pattern = fr"<{state.section_tag}(?:[\s]+[^>]*)?>"
+ start_tag_pattern = fr"<(thead|tbody|tfoot)(?:[\s]+[^>]*)?>"
open_tr_pattern = r"
]*)?>[\s]*";
@@ -84,7 +85,7 @@ def do_table_line(state, line):
caption_sentinel_pattern = "(?:" + \
'|'.join((cell_delimiter, passthrough_pattern, start_tag_pattern,
- open_tr_pattern, open_caption_pattern)) + ")"
+ open_tr_pattern, open_caption_pattern, r'?table.*')) + ")"
already_open_tr = 0;
@@ -94,8 +95,13 @@ def do_table_line(state, line):
# Is it whatever our start tag is?
if (m := re.match(start_tag_pattern, line)):
# we'll pass this through, and remember that we don't need to do
- # it ourselves
- print("found our start tag");
+ # it ourselves also possibly close previous section also change
+ # "section_tag" to be thead
+ print(f"found our start tag: '{m.group(0)}'");
+
+ # if we're already open, close whatever that was
+ if state.already_opened_section:
+ out += f'{state.section_tag}>' + "\n";
out += m.group(0) + "\n";
@@ -103,6 +109,7 @@ def do_table_line(state, line):
print(f'line = "{line}"');
+ state.section_tag = m.group(1);
state.already_opened_section = 1;
continue;
@@ -225,7 +232,6 @@ def do_table_line(state, line):
line = line[1:];
print(f'line = "{line}"');
- # elif (m := re.match(r"<([a-z]+)(?:[\s]+([^<>]*))?>", line)):
elif (m := re.match(r"<(th|td)(?:[\s]+([^<>]*))?>", line)):
print("found opening HTML tag");
@@ -263,10 +269,10 @@ def do_table_line(state, line):
line = line[len(m.group(0)):];
print(f'line = "{line}"');
- # elif (m := re.match(r"<([a-z]+)", line)):
- # print("found HTML open, but it's incomplete? huh?! throwing!");
+ elif (m := re.match(r"<(th|td)", line)):
+ print("found HTML open, but it's incomplete? huh?! throwing!");
- # raise SyntaxError("could not find '>' for HTML open tag");
+ raise SyntaxError("could not find '>' for HTML open tag");
elif column_index < len(state.column_info):
print("found nothing, defaulting to column info");
@@ -328,10 +334,6 @@ def do_table_line(state, line):
line = line[len(m.group(0)):]
print(f'line = "{line}"');
print(f'depth = {depth}');
- # elif (m := re.match(fr"{tags}>", line)):
- # # ignore the closers for table parts, no passthrough
- # line = line[len(m.group(0)):]
- # print(f'line = "{line}"');
elif (m := re.match("<[a-z]+(?:\\s+[^<>]*)?>", line)):
content += m.group(0);
line = line[len(m.group(0)):]
@@ -373,83 +375,160 @@ def do_table_line(state, line):
return out;
-def do_table(table_open_tag, header_lines, seperator_line, body_lines, optional_caption):
-
- # handle explicit table tag?
- if table_open_tag:
- open_tag = table_open_tag + "\n";
- else:
- # otherwise, add a default one:
- open_tag = "" + "\n";
-
- inner = "";
-
- state = State(section_tag = "thead", default_cell_tag = "th");
-
- # Process the header lines:
- for line in header_lines:
- html_table_line = do_table_line(state, line);
-
- inner += html_table_line;
-
- if state.already_opened_section:
- inner += "" "\n";
-
- # Handle line seperator:
- column_info = parse_colinfo(seperator_line);
-
- # Process the body lines:
- for lines in body_lines:
- state = State(section_tag = "tbody", \
- default_cell_tag = "td", \
- column_info = column_info);
-
- for line in lines:
- inner += do_table_line(state, line);
-
- if state.already_opened_section:
- inner += "" "\n";
-
- # Consider the optional caption.
- # If it happens, it goes before everything else
- if optional_caption:
- inner = f" {optional_caption} \n" + inner;
-
- close_tag = "
\n";
-
- for o in inner.split("\n"):
- print(o);
-
- return "\n\n" + open_tag + inner + close_tag + "\n\n";
+# def do_table(table_open_tag, header_lines, seperator_line, body_lines, optional_caption):
+#
+# # handle explicit table tag?
+# if table_open_tag:
+# open_tag = table_open_tag + "\n";
+# else:
+# # otherwise, add a default one:
+# open_tag = "" + "\n";
+#
+# inner = "";
+#
+# state = State(section_tag = "thead", default_cell_tag = "th");
+#
+# # Process the header lines:
+# for line in header_lines:
+# inner = do_table_line(state, line);
+#
+# if state.already_opened_section:
+# inner += f"{state.section_tag}>" "\n";
+#
+# # Handle line seperator:
+# column_info = parse_colinfo(seperator_line);
+#
+# # Process the body lines:
+# for lines in body_lines:
+# state = State(section_tag = "tbody", \
+# default_cell_tag = "td", \
+# column_info = column_info);
+#
+# for line in lines:
+# inner += do_table_line(state, line);
+#
+# if state.already_opened_section:
+# inner += f"{state.section_tag}>" "\n";
+#
+# # Consider the optional caption.
+# # If it happens, it goes before everything else
+# if optional_caption:
+# inner = f" {optional_caption} \n" + inner;
+#
+# close_tag = "
\n";
+#
+# for o in inner.split("\n"):
+# print(o);
+#
+# return "\n\n" + open_tag + inner + close_tag + "\n\n";
def handle_table(m):
print("handle_table");
matched = m.group(0);
- print(f'matched = """{matched}"""');
-
optional_table_open = m.group(1);
one_or_more_header_lines = m.group(2);
+ header_lines = one_or_more_header_lines.strip().split("\n")
+
seperator_line = m.group(3);
one_or_more_body_lines = m.group(4);
+ body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")]
+
optional_caption = m.group(5);
assert(seperator_line is not None)
try:
- return do_table(
- optional_table_open,
- one_or_more_header_lines.strip().split("\n"),
- seperator_line,
- [e.strip().split("\n")
- for e in one_or_more_body_lines.strip().split("\n\n")],
- optional_caption,
- );
+ # handle explicit table tag?
+ if optional_table_open:
+ open_tag = optional_table_open + "\n";
+ else:
+ # otherwise, add a default one:
+ open_tag = "" + "\n";
+
+ inner = "";
+
+ state = State(section_tag = "thead", default_cell_tag = "th");
+
+ # Process the header lines:
+ for line in header_lines:
+ inner = do_table_line(state, line);
+
+ if state.already_opened_section:
+ inner += f"{state.section_tag}>" "\n";
+
+ # Handle line seperator:
+ column_info = parse_colinfo(seperator_line);
+
+ # Process the body lines:
+ for lines in body_lines:
+ state = State(section_tag = "tbody", \
+ default_cell_tag = "td", \
+ column_info = column_info);
+
+ for line in lines:
+ inner += do_table_line(state, line);
+
+ if state.already_opened_section:
+ inner += f"{state.section_tag}>" "\n";
+
+ # Consider the optional caption.
+ # If it happens, it goes before everything else
+ if optional_caption:
+ inner = f" {optional_caption} \n" + inner;
+
+ close_tag = "
\n";
+
+ return "\n\n" + open_tag + inner + close_tag + "\n\n";
+ except SyntaxError as e:
+ print(f"caught syntax error: {e}");
+ print("moving on to next table...");
+ return m.group(0);
+
+def handle_table_no_sep(m):
+ print("handle_table_no_sep");
+
+ matched = m.group(0);
+
+ print(f'matched = """{matched}"""');
+
+ table_open_tag = m.group(1) + "\n";
+
+ one_or_more_body_lines = m.group(2);
+
+ body_lines = [e.strip().split("\n") for e in one_or_more_body_lines.strip().split("\n\n")]
+
+ optional_caption = m.group(3);
+
+ try:
+ inner = "";
+
+ # Process the body lines:
+ for lines in body_lines:
+ state = State(section_tag = "tbody", \
+ default_cell_tag = "td", \
+ column_info = []);
+
+ for line in lines:
+ inner += do_table_line(state, line);
+
+ if state.already_opened_section:
+ inner += f"{state.section_tag}>" "\n";
+
+ # Consider the optional caption.
+ # If it happens, it goes before everything else
+ if optional_caption:
+ inner = f" {optional_caption} \n" + inner;
+
+ table_close_tag = "\n";
+
+ return "\n\n" + table_open_tag + inner + table_close_tag + "\n\n";
+
except SyntaxError as e:
print(f"caught syntax error: {e}");
print("moving on to next table...");
@@ -489,7 +568,7 @@ for o, c in product((1, 0), repeat=2):
[\n]{{2}}
# optional or required open table tag:
- (?:(]*markdown="1"[^<>]*>) \n){{{o},1}}
+ (?:(]*markdown=(?:"1"|'1'|1)[^<>]*>) \n){{{o},1}}
# zero or one or more header rows:
((?: {row} \n){{{1-o},}})
@@ -512,6 +591,28 @@ for o, c in product((1, 0), repeat=2):
text = re.sub(table, handle_table, text, flags=re.VERBOSE)
+table = fr"""
+ # two blank lines:
+ [\n]{{2}}
+
+ # required open table tag:
+ (?:(]*markdown=(?:"1"|'1'|1)[^<>]*>) \n)
+
+ # one or more body rows, with empty lines of one:
+ ((?: {row} [\n]{{1,2}})+)
+
+ # optional caption:
+ (?: \[ ([a-z0-9 "']+) \] \n)?
+
+ # required close table tag:
+ (?:
[\n])
+
+ # two blank lines:
+ [\n]{{2}}
+""";
+
+text = re.sub(table, handle_table_no_sep, text, flags=re.VERBOSE)
+
text += """