From a60aca7c36647ec974609b91c308b81382c96542 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Moritz=20B=C3=B6hme?= <mail@moritzboeh.me>
Date: Mon, 26 Aug 2024 12:25:10 +0200
Subject: [PATCH] feat: add .filetags syntax for including other files

---
 filetags/__init__.py | 115 ++++++++++++++++++++++++++-----------------
 tests/unit_tests.py  |  89 +++++++++++++++++++++++++++++++--
 2 files changed, 154 insertions(+), 50 deletions(-)

diff --git a/filetags/__init__.py b/filetags/__init__.py
index a011465..ee23785 100755
--- a/filetags/__init__.py
+++ b/filetags/__init__.py
@@ -102,6 +102,9 @@ unique_tags = [UNIQUE_TAG_TESTSTRINGS]  # list of list which contains tags that
 DONOTSUGGEST_PREFIX = '#donotsuggest '
 do_not_suggest_tags = []  # list of lower-case strings
 
+INCLUDE_PREFIX = '#include '
+included_files = []
+
 DESCRIPTION = "This tool adds or removes simple tags to/from file names.\n\
 \n\
 Tags within file names are placed between the actual file name and\n\
@@ -1738,58 +1741,78 @@ def locate_and_parse_controlled_vocabulary(startfile):
 
     global unique_tags
     global do_not_suggest_tags
+    global included_files
 
     if filename:
-        logging.debug('locate_and_parse_controlled_vocabulary: .filetags found: ' + filename)
-        if os.path.isfile(filename):
-            logging.debug('locate_and_parse_controlled_vocabulary: found controlled vocabulary')
-
-            tags = []
-            with codecs.open(filename, encoding='utf-8') as filehandle:
-                logging.debug('locate_and_parse_controlled_vocabulary: reading controlled vocabulary in [%s]' %
-                              filename)
-                global controlled_vocabulary_filename
-                controlled_vocabulary_filename = filename
-                for rawline in filehandle:
-
-                    if rawline.strip().lower().startswith(DONOTSUGGEST_PREFIX):
-                        # parse and save do not suggest tags:
-                        line = rawline[len(DONOTSUGGEST_PREFIX):].strip().lower()
-                        for tag in line.split(BETWEEN_TAG_SEPARATOR):
-                            do_not_suggest_tags.append(tag)
-                    else:
-
-                        # remove everyting after the first hash character (which is a comment separator)
-                        line = rawline.strip().split('#')[0].strip()  # split and take everything before the first '#' as new "line"
-
-                        if len(line) == 0:
-                            # nothing left, line consisted only of a comment or was empty
-                            continue
-
-                        if BETWEEN_TAG_SEPARATOR in line:
-                            ## if multiple tags are in one line, they are mutually exclusive: only has can be set via filetags
-                            logging.debug('locate_and_parse_controlled_vocabulary: found unique tags: %s' %
-                                          (line))
-                            unique_tags.append(line.split(BETWEEN_TAG_SEPARATOR))
-                            for tag in line.split(BETWEEN_TAG_SEPARATOR):
-                                # *also* append unique tags to general tag list:
-                                tags.append(tag)
-                        else:
-                            tags.append(line)
-
-            logging.debug('locate_and_parse_controlled_vocabulary: controlled vocabulary has %i tags' %
-                          len(tags))
-            logging.debug('locate_and_parse_controlled_vocabulary: controlled vocabulary has %i groups of unique tags' %
-                          (len(unique_tags) - 1))
-
-            return tags
-        else:
-            logging.debug('locate_and_parse_controlled_vocabulary: controlled vocabulary is a non-existing file')
-            return []
+        return parse_controlled_vocabulary(filename)
     else:
         logging.debug('locate_and_parse_controlled_vocabulary: could not derive filename for controlled vocabulary')
         return []
 
+def parse_controlled_vocabulary(filename):
+    """Parses a controlled vocabulary file."""
+    files_to_include = []
+
+    logging.debug('parse_controlled_vocabulary: .filetags found: ' + filename)
+    if os.path.isfile(filename):
+        logging.debug('parse_controlled_vocabulary: found controlled vocabulary')
+
+        included_files.append(os.path.realpath(filename))
+
+        tags = []
+        with codecs.open(filename, encoding='utf-8') as filehandle:
+            logging.debug('parse_controlled_vocabulary: reading controlled vocabulary in [%s]' %
+                            filename)
+            global controlled_vocabulary_filename
+            controlled_vocabulary_filename = filename
+            for rawline in filehandle:
+                if rawline.strip().lower().startswith(INCLUDE_PREFIX):
+                    file_to_include = rawline.strip().removeprefix(INCLUDE_PREFIX)
+                    current_file_dir = os.path.dirname(filename)
+                    file_path = os.path.realpath(os.path.join(current_file_dir, file_to_include))
+                    logging.debug('parse_controlled_vocabulary: found include statement for file [%s]' % file_path)
+                    if file_path not in included_files:
+                        files_to_include.append(file_path)
+                        logging.debug('parse_controlled_vocabulary: including file [%s]' % file_path)
+
+                elif rawline.strip().lower().startswith(DONOTSUGGEST_PREFIX):
+                    # parse and save do not suggest tags:
+                    line = rawline[len(DONOTSUGGEST_PREFIX):].strip().lower()
+                    for tag in line.split(BETWEEN_TAG_SEPARATOR):
+                        do_not_suggest_tags.append(tag)
+                else:
+
+                    # remove everyting after the first hash character (which is a comment separator)
+                    line = rawline.strip().split('#')[0].strip()  # split and take everything before the first '#' as new "line"
+
+                    if len(line) == 0:
+                        # nothing left, line consisted only of a comment or was empty
+                        continue
+
+                    if BETWEEN_TAG_SEPARATOR in line:
+                        ## if multiple tags are in one line, they are mutually exclusive: only has can be set via filetags
+                        logging.debug('parse_controlled_vocabulary: found unique tags: %s' %
+                                        (line))
+                        unique_tags.append(line.split(BETWEEN_TAG_SEPARATOR))
+                        for tag in line.split(BETWEEN_TAG_SEPARATOR):
+                            # *also* append unique tags to general tag list:
+                            tags.append(tag)
+                    else:
+                        tags.append(line)
+
+        for file in files_to_include:
+            tags.extend(parse_controlled_vocabulary(file))
+
+        logging.debug('parse_controlled_vocabulary: controlled vocabulary has %i tags' %
+                        len(tags))
+        logging.debug('parse_controlled_vocabulary: controlled vocabulary has %i groups of unique tags' %
+                        (len(unique_tags) - 1))
+
+        return tags
+    else:
+        logging.debug('parse_controlled_vocabulary: controlled vocabulary is a non-existing file')
+        return []
+
 
 def print_tag_shortcut_with_numbers(tag_list, tags_get_added=True, tags_get_linked=False):
     """A list of tags from the list are printed to stdout. Each tag
diff --git a/tests/unit_tests.py b/tests/unit_tests.py
index d8000e5..3e71090 100755
--- a/tests/unit_tests.py
+++ b/tests/unit_tests.py
@@ -462,9 +462,8 @@ class TestLocateAndParseControlledVocabulary(unittest.TestCase):
         self.assertEqual(set(cv), set(["foo", "bar", "baz", "tag"]))
 
 
-    def test_include_lines_in_cv(self):
+    def test_include_lines_in_cv_not_circular(self):
         """
-        FIXXME!!!!
         This tests does not use the setup from the test class. However, it does use several
         other util functions defined in this class. Therefore, I set up a different test
         case here and re-use the util functions.
@@ -473,13 +472,95 @@ class TestLocateAndParseControlledVocabulary(unittest.TestCase):
         tmpdir
              `- subdir1
                       |
-                       `- .filetags with a reference to subdir2/included_filetags
+                       `- .filetags with a reference to subdir2/included.filetags
               - subdir2
                       |
                        `- included_filetags with additional tags
         """
-        pass  # FIXXME: implement
+        tempdir = tempfile.mkdtemp(prefix="TestControlledVocabulary_Include")
+        print("\ntempdir: " + tempdir + '  <<<' + '#' * 10)
 
+        subdir1 = os.path.join(tempdir, "subdir1")
+        os.makedirs(subdir1)
+        assert(os.path.exists(subdir1))
+
+        subdir2 = os.path.join(tempdir, "subdir2")
+        os.makedirs(subdir2)
+        assert(os.path.exists(subdir2))
+
+        include_cv = """
+        tag_from_include_before_CV
+        #include ../subdir2/included.filetags
+        tag_from_include_after_CV
+        """
+        include_file = os.path.join(subdir1, '.filetags')
+        self.create_file(include_file, include_cv)
+        assert(os.path.isfile(include_file))
+
+        included_cv = 'tag_from_included_CV'
+        included_file = os.path.join(subdir2, 'included.filetags')
+        self.create_file(included_file, included_cv)
+        assert(os.path.isfile(included_file))
+
+        if platform.system() != 'Windows':
+            os.sync()
+
+        # setup complete
+
+        cv = filetags.locate_and_parse_controlled_vocabulary(include_file)
+        self.assertEqual(set(cv), set(["tag_from_include_before_CV", "tag_from_include_after_CV", "tag_from_included_CV"]))
+
+    def test_include_lines_in_cv_circular(self):
+        """
+        This tests does not use the setup from the test class. However, it does use several
+        other util functions defined in this class. Therefore, I set up a different test
+        case here and re-use the util functions.
+
+        Setup looks like this:
+        tmpdir
+             `- subdir1
+                      |
+                       `- .filetags with a reference to subdir2/included.filetags
+              - subdir2
+                      |
+                       `- included.filetags with additional tags and reference to subdir1/.filetags
+        """
+        tempdir = tempfile.mkdtemp(prefix="TestControlledVocabulary_Include")
+        print("\ntempdir: " + tempdir + '  <<<' + '#' * 10)
+
+        subdir1 = os.path.join(tempdir, "subdir1")
+        os.makedirs(subdir1)
+        assert(os.path.exists(subdir1))
+
+        subdir2 = os.path.join(tempdir, "subdir2")
+        os.makedirs(subdir2)
+        assert(os.path.exists(subdir2))
+
+        circular1_cv = """
+        tag_from_first_before_CV
+        #include ../subdir2/included.filetags
+        tag_from_first_after_CV
+        """
+        circular1_file = os.path.join(subdir1, '.filetags')
+        self.create_file(circular1_file, circular1_cv)
+        assert(os.path.isfile(circular1_file))
+
+        circular2_cv = """
+        tag_from_second_before_CV
+        #include ../subdir1/.filetags
+        tag_from_second_after_CV
+        """
+        circular2_file = os.path.join(subdir2, 'included.filetags')
+        self.create_file(circular2_file, circular2_cv)
+        assert(os.path.isfile(circular2_file))
+
+        if platform.system() != 'Windows':
+            os.sync()
+
+        # setup complete
+
+        cv = filetags.locate_and_parse_controlled_vocabulary(circular1_file)
+        self.assertEqual(set(cv), set(["tag_from_first_before_CV", "tag_from_first_after_CV", "tag_from_second_before_CV", "tag_from_second_after_CV"]))
 
 class TestFileWithoutTags(unittest.TestCase):