Add --force-cv flag to limit tags to the controlled vocabulary

Closes #18
2026-02-16 14:04:14 +00:00 · 2026-02-10 14:31:01 +01:00 · 2026-02-10 14:31:01 +01:00 · 35c9edf4aa
commit 35c9edf4aa
parent 778a2de5a0
2 changed files with 164 additions and 30 deletions
--- a/filetags/init.py
+++ b/filetags/init.py
@ -284,6 +284,10 @@ parser.add_argument("--tag-gardening",
                    help="This is for getting an overview on tags that might require to be renamed (typos, " +
                    "singular/plural, ...). See also http://www.webology.org/2008/v5n3/a58.html")

+parser.add_argument("--force-cv",
+                    dest="force_cv", action="store_true",
+                    help="Only allow tags that are part of the controlled vocabulary (.filetags)")
+
 parser.add_argument("-v", "--verbose",
                    dest="verbose", action="store_true",
                    help="Enable verbose mode")
@ -398,13 +402,15 @@ class TagDialog:

        ## FIXXME: Layout can be improved by somebody who knows how to do this.
        ##         E.g.: gray labels left justified, values centered (as they are now)
-        
+
        self.root = root
        self.root.title("filetags")

        self.vocabulary = vocabulary
        num_of_vocabulary_entries = len(vocabulary)
        self.entered_tags = ""
+        self.cancelled = False
+        self.force_cv_enabled = options.force_cv and not options.remove and not options.tagfilter and not options.tagtrees
        low_contrast_fg_color = self.get_soft_foreground(root, 0.6)  ## better than hard-coded gray values that interfere with default color schema

        # Label for instructions
@ -431,11 +437,17 @@ class TagDialog:
            plural = ''
        self.label = tk.Label(self.root, text=f"Please enter 🏷 for {str(number_of_files)} file{plural}", width=50)
        self.label.pack(pady=(20,0))
-        
+
        # Create an entry widget for input
        self.entry = ttk.Entry(self.root, width=40)
        self.entry.pack(pady=(0,30))

+        warning_text = ""
+        if options.force_cv and not self.vocabulary:
+            warning_text = "No controlled vocabulary (.filetags) found; --force-cv disabled."
+        self.error_label = tk.Label(self.root, fg="red", text=warning_text)
+        self.error_label.pack(pady=(0,10))
+
        # Set focus on the entry field
        self.entry.focus_set()  # Ensure the cursor is within the entry field on startup

@ -605,15 +617,25 @@ class TagDialog:
        ## knowledge.

        self.entered_tags = self.entry.get().strip()
-        print(f"Entered Tags: {self.entered_tags}")
+        self.cancelled = False
+        tags_for_validation = extract_tags_from_argument(self.entered_tags)
+        invalid_tags = force_cv_validator(self.force_cv_enabled, tags_for_validation, self.vocabulary)
+        if invalid_tags:
+            error_msg = "Invalid tags: " + BETWEEN_TAG_SEPARATOR.join(invalid_tags)
+            similar_msg = build_similar_to_invalid_tags_message(invalid_tags, self.vocabulary)
+            if similar_msg:
+                error_msg += "\n" + similar_msg
+            self.error_label.config(text=error_msg)
+            return

        # Close the window after submission
        self.root.quit()

    def on_cancel(self):
        # Just close the dialog
+        self.cancelled = True
        self.root.destroy()        
-    
+

 def contains_tag(filename, tagname=False):
    """
@ -1614,6 +1636,45 @@ def find_similar_tags(tag, tags):
    return close_but_not_exact_matches


+def get_invalid_tags_for_vocabulary(tags, vocabulary):
+    """Return a list of tags not contained in the controlled vocabulary."""
+
+    normalized_vocabulary = set()
+    for vocab_tag in vocabulary:
+        if vocab_tag.startswith('-'):
+            normalized_vocabulary.add(vocab_tag[1:])
+        else:
+            normalized_vocabulary.add(vocab_tag)
+
+    invalid_tags = []
+    for raw_tag in tags:
+        tag = raw_tag[1:] if raw_tag.startswith('-') else raw_tag
+        if raw_tag.startswith('-') and tag in normalized_vocabulary:
+            continue
+        if tag not in normalized_vocabulary:
+            invalid_tags.append(raw_tag)
+    return list(dict.fromkeys(invalid_tags))
+
+def force_cv_validator(force_cv_enabled, tags_for_validation, vocabulary):
+    if not force_cv_enabled:
+        return None
+    if not tags_for_validation:
+        return None
+    invalid_tags = get_invalid_tags_for_vocabulary(tags_for_validation, vocabulary)
+    if invalid_tags:
+        return invalid_tags
+    return None
+
+def build_similar_to_invalid_tags_message(invalid_tags, vocabulary):
+    suggestions = []
+    for tag in invalid_tags:
+        similar = find_similar_tags(tag, vocabulary)
+        if similar:
+            suggestions.append(tag + " -> " + BETWEEN_TAG_SEPARATOR.join(similar))
+    if not suggestions:
+        return None
+    return "Similar tags: " + "; ".join(suggestions)
+
 def print_tag_dict(tag_dict_reference, vocabulary=False, sort_index=0,
                   print_similar_vocabulary_tags=False, print_only_tags_with_similar_tags=False):
    """
@ -2238,7 +2299,8 @@ def _get_tag_visual(tags_for_visual=None):

    return visual

-def ask_for_tags_text_version(vocabulary, upto9_tags_for_shortcuts, hint_str, tag_list, tags_for_visual=None):
+def ask_for_tags_text_version(vocabulary, upto9_tags_for_shortcuts, hint_str, tag_list,
+                              tags_for_visual=None, prompt_prefill=None, invalid_tags=None):
    """
    Takes a vocabulary and optional up to nine tags for shortcuts and interactively asks
    the user to enter tags. Aborts program if no tags were entered. Returns list of
@ -2261,27 +2323,46 @@ def ask_for_tags_text_version(vocabulary, upto9_tags_for_shortcuts, hint_str, ta
        readline.parse_and_bind('tab: complete')

        completionhint = '; complete %s tags with TAB' % str(len(vocabulary))
+        if options.force_cv and not options.remove and not options.tagfilter and not options.tagtrees:
+            completionhint += '; tags must match your controlled vocabulary'

    logging.debug("len(files) [%s]" % str(len(options.files)))
    logging.debug("files: %s" % str(options.files))

-    print("                 ")
-    print("Please enter tags" + colorama.Style.DIM + ", separated by \"" +
-          BETWEEN_TAG_SEPARATOR + "\"; abort with Ctrl-C" +
-          completionhint + colorama.Style.RESET_ALL)
-    print("                     ")
-    print(_get_tag_visual(tags_for_visual))
-    print("                     ")
+    minimal_prompt = invalid_tags is not None
+    if not minimal_prompt:
+        print("                 ")

-    if len(upto9_tags_for_shortcuts) > 0:
-        print_tag_shortcut_with_numbers(hint_str, tag_list)
+    if invalid_tags:
+        print(colorama.Fore.RED + "Invalid tags:" + colorama.Style.RESET_ALL +
+              " " + BETWEEN_TAG_SEPARATOR.join(invalid_tags))
+        similar_msg = build_similar_to_invalid_tags_message(invalid_tags, vocabulary)
+        if similar_msg:
+            print(similar_msg)
+
+    if not minimal_prompt:
+        print("Please enter tags" + colorama.Style.DIM + ", separated by \"" +
+              BETWEEN_TAG_SEPARATOR + "\"; abort with Ctrl-C" +
+              completionhint + colorama.Style.RESET_ALL)
+        print("                     ")
+        print(_get_tag_visual(tags_for_visual))
+        print("                     ")
+
+        if len(upto9_tags_for_shortcuts) > 0:
+            print_tag_shortcut_with_numbers(hint_str, tag_list)

    logging.debug("interactive mode: asking for tags ...")
+    if prompt_prefill:
+        def _prefill():
+            readline.insert_text(prompt_prefill)
+        readline.set_startup_hook(_prefill)
    entered_tags = input(colorama.Style.DIM + 'Tags: ' + colorama.Style.RESET_ALL).strip()
+    readline.set_startup_hook()
    return extract_tags_from_argument(entered_tags)


-def ask_for_tags_gui_version(vocabulary, upto9_tags_for_shortcuts, hint_str, tag_list, tags_for_visual=None):
+def ask_for_tags_gui_version(vocabulary, upto9_tags_for_shortcuts, hint_str, tag_list,
+                             tags_for_visual=None):
    """
    Takes a vocabulary and optional up to nine tags for shortcuts and interactively asks
    the user to enter tags. Aborts program if no tags were entered. Returns list of
@ -2315,10 +2396,10 @@ def ask_for_tags_gui_version(vocabulary, upto9_tags_for_shortcuts, hint_str, tag
    return extract_tags_from_argument(entered_tags)


-def ask_for_tags(vocabulary, upto9_tags_for_shortcuts, tags_for_visual=None, gui=None):
+def ask_for_tags(vocabulary, controlled_vocabulary, upto9_tags_for_shortcuts, tags_for_visual=None, gui=None):
    """
    Wrapper-function for the text-based version and the GUI version:
-    
+
    Takes a vocabulary and optional up to nine tags for shortcuts and interactively asks
    the user to enter tags. Aborts program if no tags were entered. Returns list of
    entered tags.
@ -2331,16 +2412,39 @@ def ask_for_tags(vocabulary, upto9_tags_for_shortcuts, tags_for_visual=None, gui
    hint_str, tag_list = get_tag_shortcut_information(upto9_tags_for_shortcuts,
                                                          tags_get_added=(not options.remove and not options.tagfilter),
                                                          tags_get_linked=options.tagfilter)
-    if gui:
-        tags_from_userinput = ask_for_tags_gui_version(vocabulary, upto9_tags_for_shortcuts, hint_str, tag_list, tags_for_visual)
-    else:
-        tags_from_userinput = ask_for_tags_text_version(vocabulary, upto9_tags_for_shortcuts, hint_str, tag_list, tags_for_visual)
-    
-    if not tags_from_userinput:
-        logging.info("no tags given, exiting.")
-        sys.stdout.flush()
-        sys.exit(0)
-    else:
+
+    force_cv_enabled = options.force_cv and not options.remove and not options.tagfilter and not options.tagtrees
+    if force_cv_enabled and not controlled_vocabulary:
+        print(colorama.Fore.RED + "No controlled vocabulary (.filetags) found; --force-cv disabled." + colorama.Style.RESET_ALL)
+        force_cv_enabled = False
+
+    if force_cv_enabled:
+        hint_str = hint_str + " (tags must match your controlled vocabulary)"
+
+    previous_input = None
+    previous_error = None
+    while True:
+        if gui:
+            tags_from_userinput = ask_for_tags_gui_version(
+                vocabulary, upto9_tags_for_shortcuts, hint_str, tag_list, tags_for_visual)
+        else:
+            tags_from_userinput = ask_for_tags_text_version(
+                vocabulary, upto9_tags_for_shortcuts, hint_str, tag_list, tags_for_visual,
+                prompt_prefill=previous_input, invalid_tags=previous_error)
+            previous_input = BETWEEN_TAG_SEPARATOR.join(tags_from_userinput) if tags_from_userinput else previous_input
+
+        if not tags_from_userinput:
+            logging.info("no tags given, exiting.")
+            sys.stdout.flush()
+            sys.exit(0)
+
+        validation_error = force_cv_validator(force_cv_enabled, tags_from_userinput, controlled_vocabulary)
+        if validation_error:
+            if not gui:
+                previous_error = validation_error
+                continue
+            previous_error = None
+
        if len(upto9_tags_for_shortcuts) > 0:
            # check if user entered number shortcuts for tags to be removed:
            tags_from_userinput = check_for_possible_shortcuts_in_entered_tags(
@ -2925,9 +3029,10 @@ def main():
                  ' and height: ' + str(TTY_HEIGHT) + '   (80/80 is the fall-back)')
    tags_from_userinput = []
    if files:
-        vocabulary = sorted(locate_and_parse_controlled_vocabulary(files[0]))
+        controlled_vocabulary = sorted(locate_and_parse_controlled_vocabulary(files[0]))
    else:
-        vocabulary = sorted(locate_and_parse_controlled_vocabulary(False))
+        controlled_vocabulary = sorted(locate_and_parse_controlled_vocabulary(False))
+    vocabulary = list(controlled_vocabulary)

    if len(options.files) < 1 and not (options.tagtrees or
                                       options.tagfilter or
@ -3061,7 +3166,8 @@ def main():
            logging.debug('derived vocabulary with %i entries' % len(vocabulary))  # using default vocabulary which was generate above

        # ==================== Interactive asking user for tags ============================= ##
-        tags_from_userinput = ask_for_tags(vocabulary, upto9_tags_for_shortcuts, tags_for_visual, options.gui)
+        tags_from_userinput = ask_for_tags(vocabulary, controlled_vocabulary,
+                                           upto9_tags_for_shortcuts, tags_for_visual, options.gui)
        # ==================== Interactive asking user for tags ============================= ##
        print('')  # new line after input for separating input from output

@ -3078,6 +3184,22 @@ def main():
            sys.exit(0)

    logging.debug("tags found: [%s]" % '], ['.join(tags_from_userinput))
+
+    if options.force_cv and not options.remove and not options.tagfilter and not options.tagtrees and options.tags:
+        if not controlled_vocabulary:
+            error_exit(21, "No controlled vocabulary (.filetags) found; --force-cv requires a vocabulary.")
+        invalid_tags = get_invalid_tags_for_vocabulary(tags_from_userinput, controlled_vocabulary)
+        if invalid_tags:
+            logging.error(
+                colorama.Fore.RED + "Not all tags match the controlled vocabulary " +
+                colorama.Fore.LIGHTBLACK_EX + "(\"" + str(controlled_vocabulary_filename) + "\")" +
+                colorama.Style.RESET_ALL + ": " + BETWEEN_TAG_SEPARATOR.join(invalid_tags)
+            )
+            similar_msg = build_similar_to_invalid_tags_message(invalid_tags, controlled_vocabulary)
+            if similar_msg:
+                logging.info(similar_msg)
+            sys.exit(22)
+
    if options.remove:
        logging.info("removing tags \"%s\" ..." % str(BETWEEN_TAG_SEPARATOR.join(tags_from_userinput)))
    elif options.tagfilter:
--- a/tests/unit_tests.py
+++ b/tests/unit_tests.py
@ -207,6 +207,18 @@ class TestMethods(unittest.TestCase):
                                                                     'file4 -- common foo bar jodel.txt.lnk'])),
                            set(['common', 'foo']))

+    def test_get_invalid_tags_for_vocabulary(self):
+        vocabulary = ['foo', 'bar', 'baz']
+        self.assertEqual(filetags.get_invalid_tags_for_vocabulary(['foo', 'bar'], vocabulary), [])
+        self.assertEqual(filetags.get_invalid_tags_for_vocabulary(['foo', 'qux'], vocabulary), ['qux'])
+        self.assertEqual(filetags.get_invalid_tags_for_vocabulary(['-foo', '-qux'], vocabulary), ['-qux'])
+        self.assertEqual(filetags.get_invalid_tags_for_vocabulary(['aa', 'aa', 'bb', 'aa'], ['bb']), ['aa'])
+
+    def test_build_similar_to_invalid_tags_message(self):
+        self.assertIsNone(filetags.build_similar_to_invalid_tags_message(['xxx'], ['foo', 'bar']))
+        self.assertEqual(filetags.build_similar_to_invalid__tags_message(['Simpson'], ['Simson', 'simpson']),
+                         'Similar tags: Simpson -> Simson simpson')
+
    def test_extract_tags_from_path(self):
        self.assertEqual(set(filetags.extract_tags_from_path('/a/path/without/tags')), set([]))
        self.assertEqual(set(filetags.extract_tags_from_path('/path -- ptag1/with -- ptag1 ptag2/tags')),