Add --force-cv flag to limit tags to the controlled vocabulary

Closes #18
This commit is contained in:
Jonathan Neidel 2026-02-10 14:31:01 +01:00
parent 778a2de5a0
commit 35c9edf4aa
2 changed files with 164 additions and 30 deletions

View file

@ -284,6 +284,10 @@ parser.add_argument("--tag-gardening",
help="This is for getting an overview on tags that might require to be renamed (typos, " +
"singular/plural, ...). See also http://www.webology.org/2008/v5n3/a58.html")
parser.add_argument("--force-cv",
dest="force_cv", action="store_true",
help="Only allow tags that are part of the controlled vocabulary (.filetags)")
parser.add_argument("-v", "--verbose",
dest="verbose", action="store_true",
help="Enable verbose mode")
@ -398,13 +402,15 @@ class TagDialog:
## FIXXME: Layout can be improved by somebody who knows how to do this.
## E.g.: gray labels left justified, values centered (as they are now)
self.root = root
self.root.title("filetags")
self.vocabulary = vocabulary
num_of_vocabulary_entries = len(vocabulary)
self.entered_tags = ""
self.cancelled = False
self.force_cv_enabled = options.force_cv and not options.remove and not options.tagfilter and not options.tagtrees
low_contrast_fg_color = self.get_soft_foreground(root, 0.6) ## better than hard-coded gray values that interfere with default color schema
# Label for instructions
@ -431,11 +437,17 @@ class TagDialog:
plural = ''
self.label = tk.Label(self.root, text=f"Please enter 🏷 for {str(number_of_files)} file{plural}", width=50)
self.label.pack(pady=(20,0))
# Create an entry widget for input
self.entry = ttk.Entry(self.root, width=40)
self.entry.pack(pady=(0,30))
warning_text = ""
if options.force_cv and not self.vocabulary:
warning_text = "No controlled vocabulary (.filetags) found; --force-cv disabled."
self.error_label = tk.Label(self.root, fg="red", text=warning_text)
self.error_label.pack(pady=(0,10))
# Set focus on the entry field
self.entry.focus_set() # Ensure the cursor is within the entry field on startup
@ -605,15 +617,25 @@ class TagDialog:
## knowledge.
self.entered_tags = self.entry.get().strip()
print(f"Entered Tags: {self.entered_tags}")
self.cancelled = False
tags_for_validation = extract_tags_from_argument(self.entered_tags)
invalid_tags = force_cv_validator(self.force_cv_enabled, tags_for_validation, self.vocabulary)
if invalid_tags:
error_msg = "Invalid tags: " + BETWEEN_TAG_SEPARATOR.join(invalid_tags)
similar_msg = build_similar_to_invalid_tags_message(invalid_tags, self.vocabulary)
if similar_msg:
error_msg += "\n" + similar_msg
self.error_label.config(text=error_msg)
return
# Close the window after submission
self.root.quit()
def on_cancel(self):
# Just close the dialog
self.cancelled = True
self.root.destroy()
def contains_tag(filename, tagname=False):
"""
@ -1614,6 +1636,45 @@ def find_similar_tags(tag, tags):
return close_but_not_exact_matches
def get_invalid_tags_for_vocabulary(tags, vocabulary):
"""Return a list of tags not contained in the controlled vocabulary."""
normalized_vocabulary = set()
for vocab_tag in vocabulary:
if vocab_tag.startswith('-'):
normalized_vocabulary.add(vocab_tag[1:])
else:
normalized_vocabulary.add(vocab_tag)
invalid_tags = []
for raw_tag in tags:
tag = raw_tag[1:] if raw_tag.startswith('-') else raw_tag
if raw_tag.startswith('-') and tag in normalized_vocabulary:
continue
if tag not in normalized_vocabulary:
invalid_tags.append(raw_tag)
return list(dict.fromkeys(invalid_tags))
def force_cv_validator(force_cv_enabled, tags_for_validation, vocabulary):
if not force_cv_enabled:
return None
if not tags_for_validation:
return None
invalid_tags = get_invalid_tags_for_vocabulary(tags_for_validation, vocabulary)
if invalid_tags:
return invalid_tags
return None
def build_similar_to_invalid_tags_message(invalid_tags, vocabulary):
suggestions = []
for tag in invalid_tags:
similar = find_similar_tags(tag, vocabulary)
if similar:
suggestions.append(tag + " -> " + BETWEEN_TAG_SEPARATOR.join(similar))
if not suggestions:
return None
return "Similar tags: " + "; ".join(suggestions)
def print_tag_dict(tag_dict_reference, vocabulary=False, sort_index=0,
print_similar_vocabulary_tags=False, print_only_tags_with_similar_tags=False):
"""
@ -2238,7 +2299,8 @@ def _get_tag_visual(tags_for_visual=None):
return visual
def ask_for_tags_text_version(vocabulary, upto9_tags_for_shortcuts, hint_str, tag_list, tags_for_visual=None):
def ask_for_tags_text_version(vocabulary, upto9_tags_for_shortcuts, hint_str, tag_list,
tags_for_visual=None, prompt_prefill=None, invalid_tags=None):
"""
Takes a vocabulary and optional up to nine tags for shortcuts and interactively asks
the user to enter tags. Aborts program if no tags were entered. Returns list of
@ -2261,27 +2323,46 @@ def ask_for_tags_text_version(vocabulary, upto9_tags_for_shortcuts, hint_str, ta
readline.parse_and_bind('tab: complete')
completionhint = '; complete %s tags with TAB' % str(len(vocabulary))
if options.force_cv and not options.remove and not options.tagfilter and not options.tagtrees:
completionhint += '; tags must match your controlled vocabulary'
logging.debug("len(files) [%s]" % str(len(options.files)))
logging.debug("files: %s" % str(options.files))
print(" ")
print("Please enter tags" + colorama.Style.DIM + ", separated by \"" +
BETWEEN_TAG_SEPARATOR + "\"; abort with Ctrl-C" +
completionhint + colorama.Style.RESET_ALL)
print(" ")
print(_get_tag_visual(tags_for_visual))
print(" ")
minimal_prompt = invalid_tags is not None
if not minimal_prompt:
print(" ")
if len(upto9_tags_for_shortcuts) > 0:
print_tag_shortcut_with_numbers(hint_str, tag_list)
if invalid_tags:
print(colorama.Fore.RED + "Invalid tags:" + colorama.Style.RESET_ALL +
" " + BETWEEN_TAG_SEPARATOR.join(invalid_tags))
similar_msg = build_similar_to_invalid_tags_message(invalid_tags, vocabulary)
if similar_msg:
print(similar_msg)
if not minimal_prompt:
print("Please enter tags" + colorama.Style.DIM + ", separated by \"" +
BETWEEN_TAG_SEPARATOR + "\"; abort with Ctrl-C" +
completionhint + colorama.Style.RESET_ALL)
print(" ")
print(_get_tag_visual(tags_for_visual))
print(" ")
if len(upto9_tags_for_shortcuts) > 0:
print_tag_shortcut_with_numbers(hint_str, tag_list)
logging.debug("interactive mode: asking for tags ...")
if prompt_prefill:
def _prefill():
readline.insert_text(prompt_prefill)
readline.set_startup_hook(_prefill)
entered_tags = input(colorama.Style.DIM + 'Tags: ' + colorama.Style.RESET_ALL).strip()
readline.set_startup_hook()
return extract_tags_from_argument(entered_tags)
def ask_for_tags_gui_version(vocabulary, upto9_tags_for_shortcuts, hint_str, tag_list, tags_for_visual=None):
def ask_for_tags_gui_version(vocabulary, upto9_tags_for_shortcuts, hint_str, tag_list,
tags_for_visual=None):
"""
Takes a vocabulary and optional up to nine tags for shortcuts and interactively asks
the user to enter tags. Aborts program if no tags were entered. Returns list of
@ -2315,10 +2396,10 @@ def ask_for_tags_gui_version(vocabulary, upto9_tags_for_shortcuts, hint_str, tag
return extract_tags_from_argument(entered_tags)
def ask_for_tags(vocabulary, upto9_tags_for_shortcuts, tags_for_visual=None, gui=None):
def ask_for_tags(vocabulary, controlled_vocabulary, upto9_tags_for_shortcuts, tags_for_visual=None, gui=None):
"""
Wrapper-function for the text-based version and the GUI version:
Takes a vocabulary and optional up to nine tags for shortcuts and interactively asks
the user to enter tags. Aborts program if no tags were entered. Returns list of
entered tags.
@ -2331,16 +2412,39 @@ def ask_for_tags(vocabulary, upto9_tags_for_shortcuts, tags_for_visual=None, gui
hint_str, tag_list = get_tag_shortcut_information(upto9_tags_for_shortcuts,
tags_get_added=(not options.remove and not options.tagfilter),
tags_get_linked=options.tagfilter)
if gui:
tags_from_userinput = ask_for_tags_gui_version(vocabulary, upto9_tags_for_shortcuts, hint_str, tag_list, tags_for_visual)
else:
tags_from_userinput = ask_for_tags_text_version(vocabulary, upto9_tags_for_shortcuts, hint_str, tag_list, tags_for_visual)
if not tags_from_userinput:
logging.info("no tags given, exiting.")
sys.stdout.flush()
sys.exit(0)
else:
force_cv_enabled = options.force_cv and not options.remove and not options.tagfilter and not options.tagtrees
if force_cv_enabled and not controlled_vocabulary:
print(colorama.Fore.RED + "No controlled vocabulary (.filetags) found; --force-cv disabled." + colorama.Style.RESET_ALL)
force_cv_enabled = False
if force_cv_enabled:
hint_str = hint_str + " (tags must match your controlled vocabulary)"
previous_input = None
previous_error = None
while True:
if gui:
tags_from_userinput = ask_for_tags_gui_version(
vocabulary, upto9_tags_for_shortcuts, hint_str, tag_list, tags_for_visual)
else:
tags_from_userinput = ask_for_tags_text_version(
vocabulary, upto9_tags_for_shortcuts, hint_str, tag_list, tags_for_visual,
prompt_prefill=previous_input, invalid_tags=previous_error)
previous_input = BETWEEN_TAG_SEPARATOR.join(tags_from_userinput) if tags_from_userinput else previous_input
if not tags_from_userinput:
logging.info("no tags given, exiting.")
sys.stdout.flush()
sys.exit(0)
validation_error = force_cv_validator(force_cv_enabled, tags_from_userinput, controlled_vocabulary)
if validation_error:
if not gui:
previous_error = validation_error
continue
previous_error = None
if len(upto9_tags_for_shortcuts) > 0:
# check if user entered number shortcuts for tags to be removed:
tags_from_userinput = check_for_possible_shortcuts_in_entered_tags(
@ -2925,9 +3029,10 @@ def main():
' and height: ' + str(TTY_HEIGHT) + ' (80/80 is the fall-back)')
tags_from_userinput = []
if files:
vocabulary = sorted(locate_and_parse_controlled_vocabulary(files[0]))
controlled_vocabulary = sorted(locate_and_parse_controlled_vocabulary(files[0]))
else:
vocabulary = sorted(locate_and_parse_controlled_vocabulary(False))
controlled_vocabulary = sorted(locate_and_parse_controlled_vocabulary(False))
vocabulary = list(controlled_vocabulary)
if len(options.files) < 1 and not (options.tagtrees or
options.tagfilter or
@ -3061,7 +3166,8 @@ def main():
logging.debug('derived vocabulary with %i entries' % len(vocabulary)) # using default vocabulary which was generate above
# ==================== Interactive asking user for tags ============================= ##
tags_from_userinput = ask_for_tags(vocabulary, upto9_tags_for_shortcuts, tags_for_visual, options.gui)
tags_from_userinput = ask_for_tags(vocabulary, controlled_vocabulary,
upto9_tags_for_shortcuts, tags_for_visual, options.gui)
# ==================== Interactive asking user for tags ============================= ##
print('') # new line after input for separating input from output
@ -3078,6 +3184,22 @@ def main():
sys.exit(0)
logging.debug("tags found: [%s]" % '], ['.join(tags_from_userinput))
if options.force_cv and not options.remove and not options.tagfilter and not options.tagtrees and options.tags:
if not controlled_vocabulary:
error_exit(21, "No controlled vocabulary (.filetags) found; --force-cv requires a vocabulary.")
invalid_tags = get_invalid_tags_for_vocabulary(tags_from_userinput, controlled_vocabulary)
if invalid_tags:
logging.error(
colorama.Fore.RED + "Not all tags match the controlled vocabulary " +
colorama.Fore.LIGHTBLACK_EX + "(\"" + str(controlled_vocabulary_filename) + "\")" +
colorama.Style.RESET_ALL + ": " + BETWEEN_TAG_SEPARATOR.join(invalid_tags)
)
similar_msg = build_similar_to_invalid_tags_message(invalid_tags, controlled_vocabulary)
if similar_msg:
logging.info(similar_msg)
sys.exit(22)
if options.remove:
logging.info("removing tags \"%s\" ..." % str(BETWEEN_TAG_SEPARATOR.join(tags_from_userinput)))
elif options.tagfilter:

View file

@ -207,6 +207,18 @@ class TestMethods(unittest.TestCase):
'file4 -- common foo bar jodel.txt.lnk'])),
set(['common', 'foo']))
def test_get_invalid_tags_for_vocabulary(self):
vocabulary = ['foo', 'bar', 'baz']
self.assertEqual(filetags.get_invalid_tags_for_vocabulary(['foo', 'bar'], vocabulary), [])
self.assertEqual(filetags.get_invalid_tags_for_vocabulary(['foo', 'qux'], vocabulary), ['qux'])
self.assertEqual(filetags.get_invalid_tags_for_vocabulary(['-foo', '-qux'], vocabulary), ['-qux'])
self.assertEqual(filetags.get_invalid_tags_for_vocabulary(['aa', 'aa', 'bb', 'aa'], ['bb']), ['aa'])
def test_build_similar_to_invalid_tags_message(self):
self.assertIsNone(filetags.build_similar_to_invalid_tags_message(['xxx'], ['foo', 'bar']))
self.assertEqual(filetags.build_similar_to_invalid__tags_message(['Simpson'], ['Simson', 'simpson']),
'Similar tags: Simpson -> Simson simpson')
def test_extract_tags_from_path(self):
self.assertEqual(set(filetags.extract_tags_from_path('/a/path/without/tags')), set([]))
self.assertEqual(set(filetags.extract_tags_from_path('/path -- ptag1/with -- ptag1 ptag2/tags')),