From 5517e39fd60526f2df3880e120ab0e6feb9c14eb Mon Sep 17 00:00:00 2001 From: Karl Voit Date: Sat, 29 Jul 2023 22:28:26 +0200 Subject: [PATCH 01/11] README: added xkcd 2173 --- README.org | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.org b/README.org index 032742a..4a12c95 100644 --- a/README.org +++ b/README.org @@ -32,6 +32,8 @@ possible length limitations). - Note that [[https://en.wikipedia.org/wiki/Folder_(computing)#Folder_metaphor]["directories" are to "folders"]] what "files" are to "documents". +https://imgs.xkcd.com/comics/trained_a_neural_net.png (Source: [[https://xkcd.com/2173/][xkcd]]) + ** Why Besides the fact that I am using [[https://en.wikipedia.org/wiki/Iso_date][ISO dates and times]] in file names From 6e4ffedb623e13aa3461ad008af42951325d84ed Mon Sep 17 00:00:00 2001 From: Karl Voit Date: Sat, 29 Jul 2023 22:28:57 +0200 Subject: [PATCH 02/11] README: added xkcd 2173 --- README.org | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.org b/README.org index 4a12c95..32cb3fb 100644 --- a/README.org +++ b/README.org @@ -32,7 +32,7 @@ possible length limitations). - Note that [[https://en.wikipedia.org/wiki/Folder_(computing)#Folder_metaphor]["directories" are to "folders"]] what "files" are to "documents". -https://imgs.xkcd.com/comics/trained_a_neural_net.png (Source: [[https://xkcd.com/2173/][xkcd]]) +[[https://imgs.xkcd.com/comics/trained_a_neural_net.png]] (Source: [[https://xkcd.com/2173/][xkcd]]) ** Why From 281ac61c03037264d5b0585038910fff1acdfd72 Mon Sep 17 00:00:00 2001 From: Karl Voit Date: Mon, 28 Aug 2023 19:04:53 +0200 Subject: [PATCH 03/11] added test_tag_file_in_subdir() but it doesn't show issue from #60 --- tests/unit_tests.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/unit_tests.py b/tests/unit_tests.py index d6eb817..d8000e5 100755 --- a/tests/unit_tests.py +++ b/tests/unit_tests.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# Time-stamp: <2021-04-03 16:16:22 vk> +# Time-stamp: <2023-08-28 18:16:44 vk> # invoke tests using following command line: # ~/src/vktag % PYTHONPATH="~/src/filetags:" tests/unit_tests.py --verbose @@ -746,6 +746,13 @@ class TestHierarchyWithFilesAndFolders(unittest.TestCase): print("FIXXME: test_locate_and_parse_controlled_vocabulary() not implemented yet") + def test_tag_file_in_subdir(self): + + # adding a tag + filetags.handle_file(os.path.join(self.tempdir, 'sub dir 1', 'foo4 -- bar.txt'), + ['testtag'], do_remove=False, do_filter=False, dryrun=False) + self.assertEqual(self.file_exists(os.path.join(self.tempdir, 'sub dir 1', 'foo4 -- bar testtag.txt')), True) + def test_tagtrees_with_tagfilter_and_no_filtertag(self): filetags.generate_tagtrees(directory=self.subdir2, From b0ec6ae8c4dad553b0d14b525bde079928f10eb6 Mon Sep 17 00:00:00 2001 From: Karl Voit Date: Mon, 28 Aug 2023 19:05:48 +0200 Subject: [PATCH 04/11] fixes issue when tagging files outside of current directory; fixes #60 --- filetags/__init__.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/filetags/__init__.py b/filetags/__init__.py index 8193c41..13de6b7 100755 --- a/filetags/__init__.py +++ b/filetags/__init__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -PROG_VERSION = "Time-stamp: <2022-01-24 12:38:12 vk>" +PROG_VERSION = "Time-stamp: <2023-08-28 19:03:57 vk>" # TODO: # - fix parts marked with «FIXXME» @@ -865,7 +865,9 @@ def handle_file_and_optional_link(orig_filename, tags, do_remove, do_filter, dry """ num_errors = 0 + original_dir = os.getcwd() logging.debug("handle_file_and_optional_link(\"" + orig_filename + "\") … " + '★' * 20) + logging.debug('handle_file_and_optional_link: original directory = ' + original_dir) if os.path.isdir(orig_filename): logging.warning("Skipping directory \"%s\" because this tool only renames file names." % orig_filename) @@ -975,6 +977,8 @@ def handle_file_and_optional_link(orig_filename, tags, do_remove, do_filter, dry new_filename = handle_file(filename, tags, do_remove, do_filter, dryrun) + logging.debug('handle_file_and_optional_link: switching back to original directory = ' + original_dir) + os.chdir(original_dir) # reset working directory logging.debug("handle_file_and_optional_link(\"" + orig_filename + "\") FINISHED " + '★' * 20) return num_errors, new_filename @@ -1600,6 +1604,7 @@ def locate_file_in_cwd_and_parent_directories(startfile, filename): logging.debug('locate_file_in_cwd_and_parent_directories: called with startfile \"%s\" and filename \"%s\" ..' % (startfile, filename)) + original_dir = os.getcwd() filename_in_startfile_dir = os.path.join(os.path.dirname(os.path.abspath(startfile)), filename) filename_in_startdir = os.path.join(startfile, filename) if startfile and os.path.isfile(startfile) and os.path.isfile(filename_in_startfile_dir): @@ -1640,10 +1645,11 @@ def locate_file_in_cwd_and_parent_directories(startfile, filename): if os.path.isfile(filename_to_look_for): logging.debug('locate_file_in_cwd_and_parent_directories: found \"%s\" in directory \"%s\" ........' % (filename, parent_dir)) - os.chdir(starting_dir) + os.chdir(original_dir) return filename_to_look_for parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir)) - os.chdir(starting_dir) + + os.chdir(original_dir) logging.debug('locate_file_in_cwd_and_parent_directories: did NOT find \"%s\" in current directory or any parent directory' % filename) return False @@ -2657,11 +2663,13 @@ def main(): set(tags_intersection_of_files)) logging.debug('deriving upto9_tags_for_shortcuts ...') + logging.debug('files[0] = ' + files[0]) + logging.debug('startdir = ' + os.path.dirname(os.path.abspath(os.path.basename(files[0])))) upto9_tags_for_shortcuts = sorted( get_upto_nine_keys_of_dict_with_highest_value( get_tags_from_files_and_subfolders( startdir=os.path.dirname( - os.path.abspath(files[0]))), + os.path.abspath(os.path.basename(files[0])))), tags_intersection_of_files, omit_filetags_donotsuggest_tags=True)) logging.debug('derived upto9_tags_for_shortcuts') logging.debug('derived vocabulary with %i entries' % len(vocabulary)) # using default vocabulary which was generate above @@ -2712,6 +2720,8 @@ def main(): if not os.path.exists(filename): logging.error('File "' + filename + '" does not exist. Skipping this one …') + logging.debug('problematic filename: ' + filename) + logging.debug('os.getcwd() = ' + os.getcwd()) num_errors += 1 elif is_broken_link(filename): From eee57a13279c45d5af2dbb3699d062c6d4ecb3d4 Mon Sep 17 00:00:00 2001 From: Karl Voit Date: Sat, 30 Sep 2023 10:42:46 +0200 Subject: [PATCH 05/11] README: added rangement link --- README.org | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.org b/README.org index 32cb3fb..602c1c0 100644 --- a/README.org +++ b/README.org @@ -710,6 +710,10 @@ filename-module of [[https://github.com/novoid/Memacs][Memacs]]. Maintenance is limited though. Please notice that my other tools working with tags do not support TagSpaces-style either. +- https://forge.chapril.org/tykayn/rangement.git + - An NPM implementation of a subset of GuessFileName (using image exif header), append2name, move2archive + - You probably need to read a bit of French + * How to Thank Me I'm glad you like my tools. If you want to support me: From f885d8843ed8c3499f6fdec83fb7c7d385056d3f Mon Sep 17 00:00:00 2001 From: DeutscheGabanna Date: Sun, 29 Oct 2023 15:06:13 +0100 Subject: [PATCH 06/11] nautilus integration explained --- Integration.org | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/Integration.org b/Integration.org index f3b92c4..f86ad6b 100644 --- a/Integration.org +++ b/Integration.org @@ -132,7 +132,43 @@ If they don't match, following script re-writes ~accels.scm~ with the current ID ** Integrarion into Nautilus -Please do read the great instructions from [[https://github.com/novoid/filetags/issues/45][that thread]]. +Nautilus allows scripts to be run directly from its GUI. The program passes the filenames of all selected files into the script under the variable ~$NAUTILUS_SCRIPT_SELECTED_FILE_PATHS~. We will use it to create our "taggify" script. + +First, check what kind of terminal you have installed and where it is located. In order to do this, run the command: + +: ps -o 'cmd=' -p $(ps -o 'ppid=' -p $$) + +Copy the output of this command and run: + +: nano ~/.local/share/nautilus/scripts/taggify.sh + +The folder where we created our .sh file is where Nautilus usually looks for scripts. Paste this into the file: + +: #!/bin/bash +: +: # Declare an array to store file paths +: file_paths=() +: +: # Split the selected file paths into an array +: IFS=$'\n' read -d '' -ra file_paths <<< "$NAUTILUS_SCRIPT_SELECTED_FILE_PATHS" +: +: # Iterate through the array and format the paths +: for file in "${file_paths[@]}"; do +: # Enclose each file path in quotation marks and append it to the array +: formatted_file="\"$file\"" +: formatted_paths+=("$formatted_file") +: done +: +: # Join the array into a single string with space-separated items +: formatted_paths="${formatted_paths[*]}" +: +: # DEBUG - Output the formatted paths to the 'output' file +: # echo "filetags $formatted_paths" > output +: +: # Open new instance of the terminal and run filetags +: /usr/bin/kgx --command="/bin/bash -ci 'source ~/.bashrc && filetags $formatted_paths && read -p \"You can now safely exit the terminal...\"'" + +Replace ~/usr/bin/kgx~ with the terminal name we previously copied. Now launch Nautilus, select some files and right click - you should see "Scripts" button in the dropdown list. If you click it, you will be able to click *Taggify* and tag all the selected files at once. ** Integration into Windows Explorer :PROPERTIES: From 8071a9f44f75979f1c136cc4169b2cfdb15519e0 Mon Sep 17 00:00:00 2001 From: Karl Voit Date: Tue, 31 Oct 2023 11:20:50 +0100 Subject: [PATCH 07/11] Adding the --overwrite option; fixes #65 --- filetags/__init__.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/filetags/__init__.py b/filetags/__init__.py index 13de6b7..fc8d391 100755 --- a/filetags/__init__.py +++ b/filetags/__init__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -PROG_VERSION = "Time-stamp: <2023-08-28 19:03:57 vk>" +PROG_VERSION = "Time-stamp: <2023-10-31 11:20:37 vk>" # TODO: # - fix parts marked with «FIXXME» @@ -186,6 +186,9 @@ parser.add_argument("-R", "--recursive", dest="recursive", action="store_true", parser.add_argument("-s", "--dryrun", dest="dryrun", action="store_true", help="Enable dryrun mode: just simulate what would happen, do not modify files") +parser.add_argument("--overwrite", dest="overwrite", action="store_true", + help="If a link is about to be created and a previous file/link exists, the old will be deleted if this is enabled.") + parser.add_argument("--hardlinks", dest="hardlinks", action="store_true", help="Use hard links instead of symbolic links. This is ignored on Windows systems. " + "Note that renaming link originals when tagging does not work with hardlinks.") @@ -1000,12 +1003,26 @@ def create_link(source, destination): The command link option "--hardlinks" switches to hardlinks. This is ignored on Windows systems. + If the destination file exists, an error is shown unless the --overwrite + option is used which results in deleting the old file and replacing with + the new link. + @param source: a file name of the source, an existing file @param destination: a file name for the link which is about to be created """ logging.debug('create_link(' + source + ', ' + destination + ') called') + + if os.path.exists(destination): + if options.overwrite: + logging.debug('destination exists and overwrite flag set → deleting old file') + os.remove(destination) + else: + logging.debug('destination exists and overwrite flag is not set → report error to user') + error_exit(21, 'Trying to create new link but found an old file with same name. ' + + 'If you want me to overwrite older files, use the "--overwrite" option. Culprit: ' + destination) + if IS_WINDOWS: # do lnk-files instead of symlinks: shell = win32com.client.Dispatch('WScript.Shell') @@ -1028,6 +1045,7 @@ def create_link(source, destination): os.link(source, destination) except OSError: logging.warning('Due to cross-device links, I had to use a symbolic link as a fall-back for: ' + source) + os.symlink(source, destination) else: # use good old high-performing symbolic links: os.symlink(source, destination) From 3cee1ffba477268cf7182910d69a419614273868 Mon Sep 17 00:00:00 2001 From: Karl Voit Date: Fri, 10 Nov 2023 18:45:08 +0100 Subject: [PATCH 08/11] fix missing --overwrite for tagtrees --- filetags/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/filetags/__init__.py b/filetags/__init__.py index fc8d391..d7f883d 100755 --- a/filetags/__init__.py +++ b/filetags/__init__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -PROG_VERSION = "Time-stamp: <2023-10-31 11:20:37 vk>" +PROG_VERSION = "Time-stamp: <2023-11-10 18:44:48 vk>" # TODO: # - fix parts marked with «FIXXME» @@ -2034,7 +2034,7 @@ def assert_empty_tagfilter_directory(directory): @param directory: the directory to use as starting directory """ - if options.tagtrees_directory and os.path.isdir(directory) and os.listdir(directory): + if options.tagtrees_directory and os.path.isdir(directory) and os.listdir(directory) and not options.overwrite: error_exit(13, 'The given tagtrees directory ' + directory + ' is not empty. Aborting here instead ' + 'of removing its content without asking. Please free it up yourself and try again.') From a7f4d58998e02f53578c9d2dec73f30b5880fc1a Mon Sep 17 00:00:00 2001 From: Karl Voit Date: Sat, 13 Jan 2024 18:29:31 +0100 Subject: [PATCH 09/11] fixed typo --- filetags/__init__.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/filetags/__init__.py b/filetags/__init__.py index d7f883d..0fa67dc 100755 --- a/filetags/__init__.py +++ b/filetags/__init__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -PROG_VERSION = "Time-stamp: <2023-11-10 18:44:48 vk>" +PROG_VERSION = "Time-stamp: <2024-01-13 18:29:18 vk>" # TODO: # - fix parts marked with «FIXXME» @@ -30,7 +30,7 @@ PROG_VERSION = "Time-stamp: <2023-11-10 18:44:48 vk>" from importlib import import_module -def save_import(library): +def safe_import(library): try: globals()[library] = import_module(library) except ImportError: @@ -46,14 +46,14 @@ import argparse # for handling command line arguments import time import logging import errno # for throwing FileNotFoundError -save_import('operator') # for sorting dicts -save_import('difflib') # for good enough matching words -save_import('readline') # for raw_input() reading from stdin -save_import('codecs') # for handling Unicode content in .tagfiles -save_import('math') # (integer) calculations -save_import('clint') # for config file handling -save_import('itertools') # for calculating permutations of tagtrees -save_import('colorama') # for colorful output +safe_import('operator') # for sorting dicts +safe_import('difflib') # for good enough matching words +safe_import('readline') # for raw_input() reading from stdin +safe_import('codecs') # for handling Unicode content in .tagfiles +safe_import('math') # (integer) calculations +safe_import('clint') # for config file handling +safe_import('itertools') # for calculating permutations of tagtrees +safe_import('colorama') # for colorful output if platform.system() == 'Windows': try: import win32com.client @@ -61,7 +61,7 @@ if platform.system() == 'Windows': print("Could not find Python module \"win32com.client\".\nPlease install it, e.g., " + "with \"sudo pip install pypiwin32\".") sys.exit(3) - save_import('pathlib') + safe_import('pathlib') PROG_VERSION_DATE = PROG_VERSION[13:23] # unused: INVOCATION_TIME = time.strftime("%Y-%m-%dT%H:%M:%S", time.localtime()) @@ -2047,7 +2047,7 @@ def assert_empty_tagfilter_directory(directory): # FIXXME 2018-04-04: I guess this is never reached because this script does never rm -r on that directory: check it and add overwrite parameter logging.debug('found old tagfilter directory "%s"; deleting directory ...' % str(directory)) if not options.dryrun: - save_import('shutil') # for removing directories with shutil.rmtree() + safe_import('shutil') # for removing directories with shutil.rmtree() shutil.rmtree(directory) logging.debug('re-creating tagfilter directory "%s" ...' % str(directory)) os.makedirs(directory) @@ -2343,7 +2343,7 @@ def start_filebrowser(directory): logging.debug('user overrides filebrowser with "none". Skipping filebrowser alltogether.') return - save_import('subprocess') + safe_import('subprocess') current_platform = platform.system() logging.debug('platform.system() is: [' + current_platform + ']') if current_platform == 'Linux': From da97bb00e58b872576326822c1f937eb2ee10683 Mon Sep 17 00:00:00 2001 From: Karl Voit Date: Fri, 28 Jun 2024 16:57:00 +0200 Subject: [PATCH 10/11] bugfix with lnk files; fixes #69 --- filetags/__init__.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/filetags/__init__.py b/filetags/__init__.py index 0fa67dc..a011465 100755 --- a/filetags/__init__.py +++ b/filetags/__init__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -PROG_VERSION = "Time-stamp: <2024-01-13 18:29:18 vk>" +PROG_VERSION = "Time-stamp: <2024-06-28 16:55:33 vk>" # TODO: # - fix parts marked with «FIXXME» @@ -507,7 +507,7 @@ def adding_tag_to_filename(filename, tagname): new_filename = os.path.join(dirname, old_basename + BETWEEN_TAG_SEPARATOR + tagname + '.' + extension) else: - new_filename = os.path.join(dirname, basename + BETWEEN_TAG_SEPARATOR + tagname) + new_filename = os.path.join(dirname, basename_without_lnk + BETWEEN_TAG_SEPARATOR + tagname) if is_lnk_file(filename): return new_filename + '.lnk' else: @@ -837,6 +837,8 @@ def split_up_filename(filename, exception_on_file_not_found=False): @param return: filename with absolute path, pathname, basename, basename without the optional ".lnk" extension """ + # logging.debug(f"split_up_filename: called with: {filename= } {exception_on_file_not_found= }") + if not os.path.exists(filename): # This does make sense for splitting up filenames that are about to be created for example: if exception_on_file_not_found: @@ -855,7 +857,9 @@ def split_up_filename(filename, exception_on_file_not_found=False): else: basename_without_lnk = basename - return os.path.join(dirname, basename), dirname, basename, basename_without_lnk + dir_and_basename = os.path.join(dirname, basename) + # logging.debug(f"split_up_filename: returns: {dir_and_basename= } {dirname= } {basename= } {basename_without_lnk= } ") + return dir_and_basename, dirname, basename, basename_without_lnk def handle_file_and_optional_link(orig_filename, tags, do_remove, do_filter, dryrun): From a60aca7c36647ec974609b91c308b81382c96542 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20B=C3=B6hme?= Date: Mon, 26 Aug 2024 12:25:10 +0200 Subject: [PATCH 11/11] feat: add .filetags syntax for including other files --- filetags/__init__.py | 115 ++++++++++++++++++++++++++----------------- tests/unit_tests.py | 89 +++++++++++++++++++++++++++++++-- 2 files changed, 154 insertions(+), 50 deletions(-) diff --git a/filetags/__init__.py b/filetags/__init__.py index a011465..ee23785 100755 --- a/filetags/__init__.py +++ b/filetags/__init__.py @@ -102,6 +102,9 @@ unique_tags = [UNIQUE_TAG_TESTSTRINGS] # list of list which contains tags that DONOTSUGGEST_PREFIX = '#donotsuggest ' do_not_suggest_tags = [] # list of lower-case strings +INCLUDE_PREFIX = '#include ' +included_files = [] + DESCRIPTION = "This tool adds or removes simple tags to/from file names.\n\ \n\ Tags within file names are placed between the actual file name and\n\ @@ -1738,58 +1741,78 @@ def locate_and_parse_controlled_vocabulary(startfile): global unique_tags global do_not_suggest_tags + global included_files if filename: - logging.debug('locate_and_parse_controlled_vocabulary: .filetags found: ' + filename) - if os.path.isfile(filename): - logging.debug('locate_and_parse_controlled_vocabulary: found controlled vocabulary') - - tags = [] - with codecs.open(filename, encoding='utf-8') as filehandle: - logging.debug('locate_and_parse_controlled_vocabulary: reading controlled vocabulary in [%s]' % - filename) - global controlled_vocabulary_filename - controlled_vocabulary_filename = filename - for rawline in filehandle: - - if rawline.strip().lower().startswith(DONOTSUGGEST_PREFIX): - # parse and save do not suggest tags: - line = rawline[len(DONOTSUGGEST_PREFIX):].strip().lower() - for tag in line.split(BETWEEN_TAG_SEPARATOR): - do_not_suggest_tags.append(tag) - else: - - # remove everyting after the first hash character (which is a comment separator) - line = rawline.strip().split('#')[0].strip() # split and take everything before the first '#' as new "line" - - if len(line) == 0: - # nothing left, line consisted only of a comment or was empty - continue - - if BETWEEN_TAG_SEPARATOR in line: - ## if multiple tags are in one line, they are mutually exclusive: only has can be set via filetags - logging.debug('locate_and_parse_controlled_vocabulary: found unique tags: %s' % - (line)) - unique_tags.append(line.split(BETWEEN_TAG_SEPARATOR)) - for tag in line.split(BETWEEN_TAG_SEPARATOR): - # *also* append unique tags to general tag list: - tags.append(tag) - else: - tags.append(line) - - logging.debug('locate_and_parse_controlled_vocabulary: controlled vocabulary has %i tags' % - len(tags)) - logging.debug('locate_and_parse_controlled_vocabulary: controlled vocabulary has %i groups of unique tags' % - (len(unique_tags) - 1)) - - return tags - else: - logging.debug('locate_and_parse_controlled_vocabulary: controlled vocabulary is a non-existing file') - return [] + return parse_controlled_vocabulary(filename) else: logging.debug('locate_and_parse_controlled_vocabulary: could not derive filename for controlled vocabulary') return [] +def parse_controlled_vocabulary(filename): + """Parses a controlled vocabulary file.""" + files_to_include = [] + + logging.debug('parse_controlled_vocabulary: .filetags found: ' + filename) + if os.path.isfile(filename): + logging.debug('parse_controlled_vocabulary: found controlled vocabulary') + + included_files.append(os.path.realpath(filename)) + + tags = [] + with codecs.open(filename, encoding='utf-8') as filehandle: + logging.debug('parse_controlled_vocabulary: reading controlled vocabulary in [%s]' % + filename) + global controlled_vocabulary_filename + controlled_vocabulary_filename = filename + for rawline in filehandle: + if rawline.strip().lower().startswith(INCLUDE_PREFIX): + file_to_include = rawline.strip().removeprefix(INCLUDE_PREFIX) + current_file_dir = os.path.dirname(filename) + file_path = os.path.realpath(os.path.join(current_file_dir, file_to_include)) + logging.debug('parse_controlled_vocabulary: found include statement for file [%s]' % file_path) + if file_path not in included_files: + files_to_include.append(file_path) + logging.debug('parse_controlled_vocabulary: including file [%s]' % file_path) + + elif rawline.strip().lower().startswith(DONOTSUGGEST_PREFIX): + # parse and save do not suggest tags: + line = rawline[len(DONOTSUGGEST_PREFIX):].strip().lower() + for tag in line.split(BETWEEN_TAG_SEPARATOR): + do_not_suggest_tags.append(tag) + else: + + # remove everyting after the first hash character (which is a comment separator) + line = rawline.strip().split('#')[0].strip() # split and take everything before the first '#' as new "line" + + if len(line) == 0: + # nothing left, line consisted only of a comment or was empty + continue + + if BETWEEN_TAG_SEPARATOR in line: + ## if multiple tags are in one line, they are mutually exclusive: only has can be set via filetags + logging.debug('parse_controlled_vocabulary: found unique tags: %s' % + (line)) + unique_tags.append(line.split(BETWEEN_TAG_SEPARATOR)) + for tag in line.split(BETWEEN_TAG_SEPARATOR): + # *also* append unique tags to general tag list: + tags.append(tag) + else: + tags.append(line) + + for file in files_to_include: + tags.extend(parse_controlled_vocabulary(file)) + + logging.debug('parse_controlled_vocabulary: controlled vocabulary has %i tags' % + len(tags)) + logging.debug('parse_controlled_vocabulary: controlled vocabulary has %i groups of unique tags' % + (len(unique_tags) - 1)) + + return tags + else: + logging.debug('parse_controlled_vocabulary: controlled vocabulary is a non-existing file') + return [] + def print_tag_shortcut_with_numbers(tag_list, tags_get_added=True, tags_get_linked=False): """A list of tags from the list are printed to stdout. Each tag diff --git a/tests/unit_tests.py b/tests/unit_tests.py index d8000e5..3e71090 100755 --- a/tests/unit_tests.py +++ b/tests/unit_tests.py @@ -462,9 +462,8 @@ class TestLocateAndParseControlledVocabulary(unittest.TestCase): self.assertEqual(set(cv), set(["foo", "bar", "baz", "tag"])) - def test_include_lines_in_cv(self): + def test_include_lines_in_cv_not_circular(self): """ - FIXXME!!!! This tests does not use the setup from the test class. However, it does use several other util functions defined in this class. Therefore, I set up a different test case here and re-use the util functions. @@ -473,13 +472,95 @@ class TestLocateAndParseControlledVocabulary(unittest.TestCase): tmpdir `- subdir1 | - `- .filetags with a reference to subdir2/included_filetags + `- .filetags with a reference to subdir2/included.filetags - subdir2 | `- included_filetags with additional tags """ - pass # FIXXME: implement + tempdir = tempfile.mkdtemp(prefix="TestControlledVocabulary_Include") + print("\ntempdir: " + tempdir + ' <<<' + '#' * 10) + subdir1 = os.path.join(tempdir, "subdir1") + os.makedirs(subdir1) + assert(os.path.exists(subdir1)) + + subdir2 = os.path.join(tempdir, "subdir2") + os.makedirs(subdir2) + assert(os.path.exists(subdir2)) + + include_cv = """ + tag_from_include_before_CV + #include ../subdir2/included.filetags + tag_from_include_after_CV + """ + include_file = os.path.join(subdir1, '.filetags') + self.create_file(include_file, include_cv) + assert(os.path.isfile(include_file)) + + included_cv = 'tag_from_included_CV' + included_file = os.path.join(subdir2, 'included.filetags') + self.create_file(included_file, included_cv) + assert(os.path.isfile(included_file)) + + if platform.system() != 'Windows': + os.sync() + + # setup complete + + cv = filetags.locate_and_parse_controlled_vocabulary(include_file) + self.assertEqual(set(cv), set(["tag_from_include_before_CV", "tag_from_include_after_CV", "tag_from_included_CV"])) + + def test_include_lines_in_cv_circular(self): + """ + This tests does not use the setup from the test class. However, it does use several + other util functions defined in this class. Therefore, I set up a different test + case here and re-use the util functions. + + Setup looks like this: + tmpdir + `- subdir1 + | + `- .filetags with a reference to subdir2/included.filetags + - subdir2 + | + `- included.filetags with additional tags and reference to subdir1/.filetags + """ + tempdir = tempfile.mkdtemp(prefix="TestControlledVocabulary_Include") + print("\ntempdir: " + tempdir + ' <<<' + '#' * 10) + + subdir1 = os.path.join(tempdir, "subdir1") + os.makedirs(subdir1) + assert(os.path.exists(subdir1)) + + subdir2 = os.path.join(tempdir, "subdir2") + os.makedirs(subdir2) + assert(os.path.exists(subdir2)) + + circular1_cv = """ + tag_from_first_before_CV + #include ../subdir2/included.filetags + tag_from_first_after_CV + """ + circular1_file = os.path.join(subdir1, '.filetags') + self.create_file(circular1_file, circular1_cv) + assert(os.path.isfile(circular1_file)) + + circular2_cv = """ + tag_from_second_before_CV + #include ../subdir1/.filetags + tag_from_second_after_CV + """ + circular2_file = os.path.join(subdir2, 'included.filetags') + self.create_file(circular2_file, circular2_cv) + assert(os.path.isfile(circular2_file)) + + if platform.system() != 'Windows': + os.sync() + + # setup complete + + cv = filetags.locate_and_parse_controlled_vocabulary(circular1_file) + self.assertEqual(set(cv), set(["tag_from_first_before_CV", "tag_from_first_after_CV", "tag_from_second_before_CV", "tag_from_second_after_CV"])) class TestFileWithoutTags(unittest.TestCase):