From 35c9edf4aa935cf85e560f398539bbe1cfc797e3 Mon Sep 17 00:00:00 2001 From: Jonathan Neidel Date: Tue, 10 Feb 2026 14:31:01 +0100 Subject: [PATCH 1/5] Add --force-cv flag to limit tags to the controlled vocabulary Closes #18 --- filetags/__init__.py | 182 ++++++++++++++++++++++++++++++++++++------- tests/unit_tests.py | 12 +++ 2 files changed, 164 insertions(+), 30 deletions(-) diff --git a/filetags/__init__.py b/filetags/__init__.py index f37bd52..89fa7af 100755 --- a/filetags/__init__.py +++ b/filetags/__init__.py @@ -284,6 +284,10 @@ parser.add_argument("--tag-gardening", help="This is for getting an overview on tags that might require to be renamed (typos, " + "singular/plural, ...). See also http://www.webology.org/2008/v5n3/a58.html") +parser.add_argument("--force-cv", + dest="force_cv", action="store_true", + help="Only allow tags that are part of the controlled vocabulary (.filetags)") + parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", help="Enable verbose mode") @@ -398,13 +402,15 @@ class TagDialog: ## FIXXME: Layout can be improved by somebody who knows how to do this. ## E.g.: gray labels left justified, values centered (as they are now) - + self.root = root self.root.title("filetags") self.vocabulary = vocabulary num_of_vocabulary_entries = len(vocabulary) self.entered_tags = "" + self.cancelled = False + self.force_cv_enabled = options.force_cv and not options.remove and not options.tagfilter and not options.tagtrees low_contrast_fg_color = self.get_soft_foreground(root, 0.6) ## better than hard-coded gray values that interfere with default color schema # Label for instructions @@ -431,11 +437,17 @@ class TagDialog: plural = '' self.label = tk.Label(self.root, text=f"Please enter 🏷 for {str(number_of_files)} file{plural}", width=50) self.label.pack(pady=(20,0)) - + # Create an entry widget for input self.entry = ttk.Entry(self.root, width=40) self.entry.pack(pady=(0,30)) + warning_text = "" + if options.force_cv and not self.vocabulary: + warning_text = "No controlled vocabulary (.filetags) found; --force-cv disabled." + self.error_label = tk.Label(self.root, fg="red", text=warning_text) + self.error_label.pack(pady=(0,10)) + # Set focus on the entry field self.entry.focus_set() # Ensure the cursor is within the entry field on startup @@ -605,15 +617,25 @@ class TagDialog: ## knowledge. self.entered_tags = self.entry.get().strip() - print(f"Entered Tags: {self.entered_tags}") + self.cancelled = False + tags_for_validation = extract_tags_from_argument(self.entered_tags) + invalid_tags = force_cv_validator(self.force_cv_enabled, tags_for_validation, self.vocabulary) + if invalid_tags: + error_msg = "Invalid tags: " + BETWEEN_TAG_SEPARATOR.join(invalid_tags) + similar_msg = build_similar_to_invalid_tags_message(invalid_tags, self.vocabulary) + if similar_msg: + error_msg += "\n" + similar_msg + self.error_label.config(text=error_msg) + return # Close the window after submission self.root.quit() def on_cancel(self): # Just close the dialog + self.cancelled = True self.root.destroy() - + def contains_tag(filename, tagname=False): """ @@ -1614,6 +1636,45 @@ def find_similar_tags(tag, tags): return close_but_not_exact_matches +def get_invalid_tags_for_vocabulary(tags, vocabulary): + """Return a list of tags not contained in the controlled vocabulary.""" + + normalized_vocabulary = set() + for vocab_tag in vocabulary: + if vocab_tag.startswith('-'): + normalized_vocabulary.add(vocab_tag[1:]) + else: + normalized_vocabulary.add(vocab_tag) + + invalid_tags = [] + for raw_tag in tags: + tag = raw_tag[1:] if raw_tag.startswith('-') else raw_tag + if raw_tag.startswith('-') and tag in normalized_vocabulary: + continue + if tag not in normalized_vocabulary: + invalid_tags.append(raw_tag) + return list(dict.fromkeys(invalid_tags)) + +def force_cv_validator(force_cv_enabled, tags_for_validation, vocabulary): + if not force_cv_enabled: + return None + if not tags_for_validation: + return None + invalid_tags = get_invalid_tags_for_vocabulary(tags_for_validation, vocabulary) + if invalid_tags: + return invalid_tags + return None + +def build_similar_to_invalid_tags_message(invalid_tags, vocabulary): + suggestions = [] + for tag in invalid_tags: + similar = find_similar_tags(tag, vocabulary) + if similar: + suggestions.append(tag + " -> " + BETWEEN_TAG_SEPARATOR.join(similar)) + if not suggestions: + return None + return "Similar tags: " + "; ".join(suggestions) + def print_tag_dict(tag_dict_reference, vocabulary=False, sort_index=0, print_similar_vocabulary_tags=False, print_only_tags_with_similar_tags=False): """ @@ -2238,7 +2299,8 @@ def _get_tag_visual(tags_for_visual=None): return visual -def ask_for_tags_text_version(vocabulary, upto9_tags_for_shortcuts, hint_str, tag_list, tags_for_visual=None): +def ask_for_tags_text_version(vocabulary, upto9_tags_for_shortcuts, hint_str, tag_list, + tags_for_visual=None, prompt_prefill=None, invalid_tags=None): """ Takes a vocabulary and optional up to nine tags for shortcuts and interactively asks the user to enter tags. Aborts program if no tags were entered. Returns list of @@ -2261,27 +2323,46 @@ def ask_for_tags_text_version(vocabulary, upto9_tags_for_shortcuts, hint_str, ta readline.parse_and_bind('tab: complete') completionhint = '; complete %s tags with TAB' % str(len(vocabulary)) + if options.force_cv and not options.remove and not options.tagfilter and not options.tagtrees: + completionhint += '; tags must match your controlled vocabulary' logging.debug("len(files) [%s]" % str(len(options.files))) logging.debug("files: %s" % str(options.files)) - print(" ") - print("Please enter tags" + colorama.Style.DIM + ", separated by \"" + - BETWEEN_TAG_SEPARATOR + "\"; abort with Ctrl-C" + - completionhint + colorama.Style.RESET_ALL) - print(" ") - print(_get_tag_visual(tags_for_visual)) - print(" ") + minimal_prompt = invalid_tags is not None + if not minimal_prompt: + print(" ") - if len(upto9_tags_for_shortcuts) > 0: - print_tag_shortcut_with_numbers(hint_str, tag_list) + if invalid_tags: + print(colorama.Fore.RED + "Invalid tags:" + colorama.Style.RESET_ALL + + " " + BETWEEN_TAG_SEPARATOR.join(invalid_tags)) + similar_msg = build_similar_to_invalid_tags_message(invalid_tags, vocabulary) + if similar_msg: + print(similar_msg) + + if not minimal_prompt: + print("Please enter tags" + colorama.Style.DIM + ", separated by \"" + + BETWEEN_TAG_SEPARATOR + "\"; abort with Ctrl-C" + + completionhint + colorama.Style.RESET_ALL) + print(" ") + print(_get_tag_visual(tags_for_visual)) + print(" ") + + if len(upto9_tags_for_shortcuts) > 0: + print_tag_shortcut_with_numbers(hint_str, tag_list) logging.debug("interactive mode: asking for tags ...") + if prompt_prefill: + def _prefill(): + readline.insert_text(prompt_prefill) + readline.set_startup_hook(_prefill) entered_tags = input(colorama.Style.DIM + 'Tags: ' + colorama.Style.RESET_ALL).strip() + readline.set_startup_hook() return extract_tags_from_argument(entered_tags) -def ask_for_tags_gui_version(vocabulary, upto9_tags_for_shortcuts, hint_str, tag_list, tags_for_visual=None): +def ask_for_tags_gui_version(vocabulary, upto9_tags_for_shortcuts, hint_str, tag_list, + tags_for_visual=None): """ Takes a vocabulary and optional up to nine tags for shortcuts and interactively asks the user to enter tags. Aborts program if no tags were entered. Returns list of @@ -2315,10 +2396,10 @@ def ask_for_tags_gui_version(vocabulary, upto9_tags_for_shortcuts, hint_str, tag return extract_tags_from_argument(entered_tags) -def ask_for_tags(vocabulary, upto9_tags_for_shortcuts, tags_for_visual=None, gui=None): +def ask_for_tags(vocabulary, controlled_vocabulary, upto9_tags_for_shortcuts, tags_for_visual=None, gui=None): """ Wrapper-function for the text-based version and the GUI version: - + Takes a vocabulary and optional up to nine tags for shortcuts and interactively asks the user to enter tags. Aborts program if no tags were entered. Returns list of entered tags. @@ -2331,16 +2412,39 @@ def ask_for_tags(vocabulary, upto9_tags_for_shortcuts, tags_for_visual=None, gui hint_str, tag_list = get_tag_shortcut_information(upto9_tags_for_shortcuts, tags_get_added=(not options.remove and not options.tagfilter), tags_get_linked=options.tagfilter) - if gui: - tags_from_userinput = ask_for_tags_gui_version(vocabulary, upto9_tags_for_shortcuts, hint_str, tag_list, tags_for_visual) - else: - tags_from_userinput = ask_for_tags_text_version(vocabulary, upto9_tags_for_shortcuts, hint_str, tag_list, tags_for_visual) - - if not tags_from_userinput: - logging.info("no tags given, exiting.") - sys.stdout.flush() - sys.exit(0) - else: + + force_cv_enabled = options.force_cv and not options.remove and not options.tagfilter and not options.tagtrees + if force_cv_enabled and not controlled_vocabulary: + print(colorama.Fore.RED + "No controlled vocabulary (.filetags) found; --force-cv disabled." + colorama.Style.RESET_ALL) + force_cv_enabled = False + + if force_cv_enabled: + hint_str = hint_str + " (tags must match your controlled vocabulary)" + + previous_input = None + previous_error = None + while True: + if gui: + tags_from_userinput = ask_for_tags_gui_version( + vocabulary, upto9_tags_for_shortcuts, hint_str, tag_list, tags_for_visual) + else: + tags_from_userinput = ask_for_tags_text_version( + vocabulary, upto9_tags_for_shortcuts, hint_str, tag_list, tags_for_visual, + prompt_prefill=previous_input, invalid_tags=previous_error) + previous_input = BETWEEN_TAG_SEPARATOR.join(tags_from_userinput) if tags_from_userinput else previous_input + + if not tags_from_userinput: + logging.info("no tags given, exiting.") + sys.stdout.flush() + sys.exit(0) + + validation_error = force_cv_validator(force_cv_enabled, tags_from_userinput, controlled_vocabulary) + if validation_error: + if not gui: + previous_error = validation_error + continue + previous_error = None + if len(upto9_tags_for_shortcuts) > 0: # check if user entered number shortcuts for tags to be removed: tags_from_userinput = check_for_possible_shortcuts_in_entered_tags( @@ -2925,9 +3029,10 @@ def main(): ' and height: ' + str(TTY_HEIGHT) + ' (80/80 is the fall-back)') tags_from_userinput = [] if files: - vocabulary = sorted(locate_and_parse_controlled_vocabulary(files[0])) + controlled_vocabulary = sorted(locate_and_parse_controlled_vocabulary(files[0])) else: - vocabulary = sorted(locate_and_parse_controlled_vocabulary(False)) + controlled_vocabulary = sorted(locate_and_parse_controlled_vocabulary(False)) + vocabulary = list(controlled_vocabulary) if len(options.files) < 1 and not (options.tagtrees or options.tagfilter or @@ -3061,7 +3166,8 @@ def main(): logging.debug('derived vocabulary with %i entries' % len(vocabulary)) # using default vocabulary which was generate above # ==================== Interactive asking user for tags ============================= ## - tags_from_userinput = ask_for_tags(vocabulary, upto9_tags_for_shortcuts, tags_for_visual, options.gui) + tags_from_userinput = ask_for_tags(vocabulary, controlled_vocabulary, + upto9_tags_for_shortcuts, tags_for_visual, options.gui) # ==================== Interactive asking user for tags ============================= ## print('') # new line after input for separating input from output @@ -3078,6 +3184,22 @@ def main(): sys.exit(0) logging.debug("tags found: [%s]" % '], ['.join(tags_from_userinput)) + + if options.force_cv and not options.remove and not options.tagfilter and not options.tagtrees and options.tags: + if not controlled_vocabulary: + error_exit(21, "No controlled vocabulary (.filetags) found; --force-cv requires a vocabulary.") + invalid_tags = get_invalid_tags_for_vocabulary(tags_from_userinput, controlled_vocabulary) + if invalid_tags: + logging.error( + colorama.Fore.RED + "Not all tags match the controlled vocabulary " + + colorama.Fore.LIGHTBLACK_EX + "(\"" + str(controlled_vocabulary_filename) + "\")" + + colorama.Style.RESET_ALL + ": " + BETWEEN_TAG_SEPARATOR.join(invalid_tags) + ) + similar_msg = build_similar_to_invalid_tags_message(invalid_tags, controlled_vocabulary) + if similar_msg: + logging.info(similar_msg) + sys.exit(22) + if options.remove: logging.info("removing tags \"%s\" ..." % str(BETWEEN_TAG_SEPARATOR.join(tags_from_userinput))) elif options.tagfilter: diff --git a/tests/unit_tests.py b/tests/unit_tests.py index 20dac4a..62e2907 100755 --- a/tests/unit_tests.py +++ b/tests/unit_tests.py @@ -207,6 +207,18 @@ class TestMethods(unittest.TestCase): 'file4 -- common foo bar jodel.txt.lnk'])), set(['common', 'foo'])) + def test_get_invalid_tags_for_vocabulary(self): + vocabulary = ['foo', 'bar', 'baz'] + self.assertEqual(filetags.get_invalid_tags_for_vocabulary(['foo', 'bar'], vocabulary), []) + self.assertEqual(filetags.get_invalid_tags_for_vocabulary(['foo', 'qux'], vocabulary), ['qux']) + self.assertEqual(filetags.get_invalid_tags_for_vocabulary(['-foo', '-qux'], vocabulary), ['-qux']) + self.assertEqual(filetags.get_invalid_tags_for_vocabulary(['aa', 'aa', 'bb', 'aa'], ['bb']), ['aa']) + + def test_build_similar_to_invalid_tags_message(self): + self.assertIsNone(filetags.build_similar_to_invalid_tags_message(['xxx'], ['foo', 'bar'])) + self.assertEqual(filetags.build_similar_to_invalid__tags_message(['Simpson'], ['Simson', 'simpson']), + 'Similar tags: Simpson -> Simson simpson') + def test_extract_tags_from_path(self): self.assertEqual(set(filetags.extract_tags_from_path('/a/path/without/tags')), set([])) self.assertEqual(set(filetags.extract_tags_from_path('/path -- ptag1/with -- ptag1 ptag2/tags')), From 07ae244d43eb16a825a36b074b006d02b6e26fd5 Mon Sep 17 00:00:00 2001 From: Jonathan Neidel Date: Tue, 10 Feb 2026 14:32:37 +0100 Subject: [PATCH 2/5] Fix typo --- filetags/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/filetags/__init__.py b/filetags/__init__.py index 89fa7af..dde5bb2 100755 --- a/filetags/__init__.py +++ b/filetags/__init__.py @@ -1916,7 +1916,7 @@ def handle_tag_gardening(vocabulary): print_tag_dict(tags_only_used_once_dict, vocabulary, sort_index=0, print_only_tags_with_similar_tags=False) if vocabulary: - print("\nTags which have similar other tags are probably typos or plural/singular forms of others:\n (first for tags not in vocabulary, second for vocaulary tags)") + print("\nTags which have similar other tags are probably typos or plural/singular forms of others:\n (first for tags not in vocabulary, second for vocbaulary tags)") tags_for_comparing = list(set(tag_dict.keys()).union(set(vocabulary))) # unified elements of both lists only_similar_tags_by_alphabet_dict = {key: value for key, value in list(tag_dict.items()) if find_similar_tags(key, tags_for_comparing)} From fc96c273d6339a5bc80db4bd34fb51eb08401760 Mon Sep 17 00:00:00 2001 From: Jonathan Neidel Date: Tue, 10 Feb 2026 14:49:02 +0100 Subject: [PATCH 3/5] Fix gui cancel --- filetags/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/filetags/__init__.py b/filetags/__init__.py index dde5bb2..70739a9 100755 --- a/filetags/__init__.py +++ b/filetags/__init__.py @@ -456,6 +456,7 @@ class TagDialog: self.entry.bind("", self.on_tab) # This binds the TAB key self.entry.bind("", self.on_return) # This binds the RETURN (Enter) key self.entry.bind("", lambda event: self.on_cancel()) # ESC cancels the dialog + self.root.protocol("WM_DELETE_WINDOW", self.on_cancel) self.label = tk.Label(self.root, fg=low_contrast_fg_color, text=f"Complete {str(num_of_vocabulary_entries)} tags with the -key") self.label.pack(pady=(30,0)) @@ -2391,6 +2392,8 @@ def ask_for_tags_gui_version(vocabulary, upto9_tags_for_shortcuts, hint_str, tag # Run the Tkinter main loop root.mainloop() + if guidialog.cancelled: + return False entered_tags = guidialog.entered_tags.strip() logging.debug(f"interactive GUI mode: entered tags: {entered_tags}") return extract_tags_from_argument(entered_tags) From c87ad56077ec045c7f3a148489746ff57b2e079b Mon Sep 17 00:00:00 2001 From: Jonathan Neidel Date: Tue, 10 Feb 2026 14:49:16 +0100 Subject: [PATCH 4/5] Improve docs --- README.org | 1 + filetags/__init__.py | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/README.org b/README.org index 44e335e..f0eebc5 100644 --- a/README.org +++ b/README.org @@ -191,6 +191,7 @@ options: --lu, --list-tags-unknown-to-vocabulary List all file-tags which are found in file names but are not part of .filetags --tag-gardening This is for getting an overview on tags that might require to be renamed (typos, singular/plural, ...). See also http://www.webology.org/2008/v5n3/a58.html + --force-cv Only allow tags that are part of the controlled vocabulary (.filetags) -v, --verbose Enable verbose mode -q, --quiet Enable quiet mode --version Display version and exit diff --git a/filetags/__init__.py b/filetags/__init__.py index 70739a9..f5b82b5 100755 --- a/filetags/__init__.py +++ b/filetags/__init__.py @@ -2421,9 +2421,6 @@ def ask_for_tags(vocabulary, controlled_vocabulary, upto9_tags_for_shortcuts, ta print(colorama.Fore.RED + "No controlled vocabulary (.filetags) found; --force-cv disabled." + colorama.Style.RESET_ALL) force_cv_enabled = False - if force_cv_enabled: - hint_str = hint_str + " (tags must match your controlled vocabulary)" - previous_input = None previous_error = None while True: From 391adeaab05d941c948907f8b59b73988d2c458d Mon Sep 17 00:00:00 2001 From: Jonathan Neidel Date: Wed, 11 Feb 2026 09:50:52 +0100 Subject: [PATCH 5/5] Fix wrong function name --- tests/unit_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit_tests.py b/tests/unit_tests.py index 62e2907..c39d802 100755 --- a/tests/unit_tests.py +++ b/tests/unit_tests.py @@ -216,7 +216,7 @@ class TestMethods(unittest.TestCase): def test_build_similar_to_invalid_tags_message(self): self.assertIsNone(filetags.build_similar_to_invalid_tags_message(['xxx'], ['foo', 'bar'])) - self.assertEqual(filetags.build_similar_to_invalid__tags_message(['Simpson'], ['Simson', 'simpson']), + self.assertEqual(filetags.build_similar_to_invalid_tags_message(['Simpson'], ['Simson', 'simpson']), 'Similar tags: Simpson -> Simson simpson') def test_extract_tags_from_path(self):