From a60aca7c36647ec974609b91c308b81382c96542 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20B=C3=B6hme?= Date: Mon, 26 Aug 2024 12:25:10 +0200 Subject: [PATCH] feat: add .filetags syntax for including other files --- filetags/__init__.py | 115 ++++++++++++++++++++++++++----------------- tests/unit_tests.py | 89 +++++++++++++++++++++++++++++++-- 2 files changed, 154 insertions(+), 50 deletions(-) diff --git a/filetags/__init__.py b/filetags/__init__.py index a011465..ee23785 100755 --- a/filetags/__init__.py +++ b/filetags/__init__.py @@ -102,6 +102,9 @@ unique_tags = [UNIQUE_TAG_TESTSTRINGS] # list of list which contains tags that DONOTSUGGEST_PREFIX = '#donotsuggest ' do_not_suggest_tags = [] # list of lower-case strings +INCLUDE_PREFIX = '#include ' +included_files = [] + DESCRIPTION = "This tool adds or removes simple tags to/from file names.\n\ \n\ Tags within file names are placed between the actual file name and\n\ @@ -1738,58 +1741,78 @@ def locate_and_parse_controlled_vocabulary(startfile): global unique_tags global do_not_suggest_tags + global included_files if filename: - logging.debug('locate_and_parse_controlled_vocabulary: .filetags found: ' + filename) - if os.path.isfile(filename): - logging.debug('locate_and_parse_controlled_vocabulary: found controlled vocabulary') - - tags = [] - with codecs.open(filename, encoding='utf-8') as filehandle: - logging.debug('locate_and_parse_controlled_vocabulary: reading controlled vocabulary in [%s]' % - filename) - global controlled_vocabulary_filename - controlled_vocabulary_filename = filename - for rawline in filehandle: - - if rawline.strip().lower().startswith(DONOTSUGGEST_PREFIX): - # parse and save do not suggest tags: - line = rawline[len(DONOTSUGGEST_PREFIX):].strip().lower() - for tag in line.split(BETWEEN_TAG_SEPARATOR): - do_not_suggest_tags.append(tag) - else: - - # remove everyting after the first hash character (which is a comment separator) - line = rawline.strip().split('#')[0].strip() # split and take everything before the first '#' as new "line" - - if len(line) == 0: - # nothing left, line consisted only of a comment or was empty - continue - - if BETWEEN_TAG_SEPARATOR in line: - ## if multiple tags are in one line, they are mutually exclusive: only has can be set via filetags - logging.debug('locate_and_parse_controlled_vocabulary: found unique tags: %s' % - (line)) - unique_tags.append(line.split(BETWEEN_TAG_SEPARATOR)) - for tag in line.split(BETWEEN_TAG_SEPARATOR): - # *also* append unique tags to general tag list: - tags.append(tag) - else: - tags.append(line) - - logging.debug('locate_and_parse_controlled_vocabulary: controlled vocabulary has %i tags' % - len(tags)) - logging.debug('locate_and_parse_controlled_vocabulary: controlled vocabulary has %i groups of unique tags' % - (len(unique_tags) - 1)) - - return tags - else: - logging.debug('locate_and_parse_controlled_vocabulary: controlled vocabulary is a non-existing file') - return [] + return parse_controlled_vocabulary(filename) else: logging.debug('locate_and_parse_controlled_vocabulary: could not derive filename for controlled vocabulary') return [] +def parse_controlled_vocabulary(filename): + """Parses a controlled vocabulary file.""" + files_to_include = [] + + logging.debug('parse_controlled_vocabulary: .filetags found: ' + filename) + if os.path.isfile(filename): + logging.debug('parse_controlled_vocabulary: found controlled vocabulary') + + included_files.append(os.path.realpath(filename)) + + tags = [] + with codecs.open(filename, encoding='utf-8') as filehandle: + logging.debug('parse_controlled_vocabulary: reading controlled vocabulary in [%s]' % + filename) + global controlled_vocabulary_filename + controlled_vocabulary_filename = filename + for rawline in filehandle: + if rawline.strip().lower().startswith(INCLUDE_PREFIX): + file_to_include = rawline.strip().removeprefix(INCLUDE_PREFIX) + current_file_dir = os.path.dirname(filename) + file_path = os.path.realpath(os.path.join(current_file_dir, file_to_include)) + logging.debug('parse_controlled_vocabulary: found include statement for file [%s]' % file_path) + if file_path not in included_files: + files_to_include.append(file_path) + logging.debug('parse_controlled_vocabulary: including file [%s]' % file_path) + + elif rawline.strip().lower().startswith(DONOTSUGGEST_PREFIX): + # parse and save do not suggest tags: + line = rawline[len(DONOTSUGGEST_PREFIX):].strip().lower() + for tag in line.split(BETWEEN_TAG_SEPARATOR): + do_not_suggest_tags.append(tag) + else: + + # remove everyting after the first hash character (which is a comment separator) + line = rawline.strip().split('#')[0].strip() # split and take everything before the first '#' as new "line" + + if len(line) == 0: + # nothing left, line consisted only of a comment or was empty + continue + + if BETWEEN_TAG_SEPARATOR in line: + ## if multiple tags are in one line, they are mutually exclusive: only has can be set via filetags + logging.debug('parse_controlled_vocabulary: found unique tags: %s' % + (line)) + unique_tags.append(line.split(BETWEEN_TAG_SEPARATOR)) + for tag in line.split(BETWEEN_TAG_SEPARATOR): + # *also* append unique tags to general tag list: + tags.append(tag) + else: + tags.append(line) + + for file in files_to_include: + tags.extend(parse_controlled_vocabulary(file)) + + logging.debug('parse_controlled_vocabulary: controlled vocabulary has %i tags' % + len(tags)) + logging.debug('parse_controlled_vocabulary: controlled vocabulary has %i groups of unique tags' % + (len(unique_tags) - 1)) + + return tags + else: + logging.debug('parse_controlled_vocabulary: controlled vocabulary is a non-existing file') + return [] + def print_tag_shortcut_with_numbers(tag_list, tags_get_added=True, tags_get_linked=False): """A list of tags from the list are printed to stdout. Each tag diff --git a/tests/unit_tests.py b/tests/unit_tests.py index d8000e5..3e71090 100755 --- a/tests/unit_tests.py +++ b/tests/unit_tests.py @@ -462,9 +462,8 @@ class TestLocateAndParseControlledVocabulary(unittest.TestCase): self.assertEqual(set(cv), set(["foo", "bar", "baz", "tag"])) - def test_include_lines_in_cv(self): + def test_include_lines_in_cv_not_circular(self): """ - FIXXME!!!! This tests does not use the setup from the test class. However, it does use several other util functions defined in this class. Therefore, I set up a different test case here and re-use the util functions. @@ -473,13 +472,95 @@ class TestLocateAndParseControlledVocabulary(unittest.TestCase): tmpdir `- subdir1 | - `- .filetags with a reference to subdir2/included_filetags + `- .filetags with a reference to subdir2/included.filetags - subdir2 | `- included_filetags with additional tags """ - pass # FIXXME: implement + tempdir = tempfile.mkdtemp(prefix="TestControlledVocabulary_Include") + print("\ntempdir: " + tempdir + ' <<<' + '#' * 10) + subdir1 = os.path.join(tempdir, "subdir1") + os.makedirs(subdir1) + assert(os.path.exists(subdir1)) + + subdir2 = os.path.join(tempdir, "subdir2") + os.makedirs(subdir2) + assert(os.path.exists(subdir2)) + + include_cv = """ + tag_from_include_before_CV + #include ../subdir2/included.filetags + tag_from_include_after_CV + """ + include_file = os.path.join(subdir1, '.filetags') + self.create_file(include_file, include_cv) + assert(os.path.isfile(include_file)) + + included_cv = 'tag_from_included_CV' + included_file = os.path.join(subdir2, 'included.filetags') + self.create_file(included_file, included_cv) + assert(os.path.isfile(included_file)) + + if platform.system() != 'Windows': + os.sync() + + # setup complete + + cv = filetags.locate_and_parse_controlled_vocabulary(include_file) + self.assertEqual(set(cv), set(["tag_from_include_before_CV", "tag_from_include_after_CV", "tag_from_included_CV"])) + + def test_include_lines_in_cv_circular(self): + """ + This tests does not use the setup from the test class. However, it does use several + other util functions defined in this class. Therefore, I set up a different test + case here and re-use the util functions. + + Setup looks like this: + tmpdir + `- subdir1 + | + `- .filetags with a reference to subdir2/included.filetags + - subdir2 + | + `- included.filetags with additional tags and reference to subdir1/.filetags + """ + tempdir = tempfile.mkdtemp(prefix="TestControlledVocabulary_Include") + print("\ntempdir: " + tempdir + ' <<<' + '#' * 10) + + subdir1 = os.path.join(tempdir, "subdir1") + os.makedirs(subdir1) + assert(os.path.exists(subdir1)) + + subdir2 = os.path.join(tempdir, "subdir2") + os.makedirs(subdir2) + assert(os.path.exists(subdir2)) + + circular1_cv = """ + tag_from_first_before_CV + #include ../subdir2/included.filetags + tag_from_first_after_CV + """ + circular1_file = os.path.join(subdir1, '.filetags') + self.create_file(circular1_file, circular1_cv) + assert(os.path.isfile(circular1_file)) + + circular2_cv = """ + tag_from_second_before_CV + #include ../subdir1/.filetags + tag_from_second_after_CV + """ + circular2_file = os.path.join(subdir2, 'included.filetags') + self.create_file(circular2_file, circular2_cv) + assert(os.path.isfile(circular2_file)) + + if platform.system() != 'Windows': + os.sync() + + # setup complete + + cv = filetags.locate_and_parse_controlled_vocabulary(circular1_file) + self.assertEqual(set(cv), set(["tag_from_first_before_CV", "tag_from_first_after_CV", "tag_from_second_before_CV", "tag_from_second_after_CV"])) class TestFileWithoutTags(unittest.TestCase):