feat: add .filetags syntax for including other files

This commit is contained in:
Moritz Böhme 2024-08-26 12:25:10 +02:00
parent da97bb00e5
commit a60aca7c36
No known key found for this signature in database
GPG key ID: 970C6E89EB0547A9
2 changed files with 154 additions and 50 deletions

View file

@ -102,6 +102,9 @@ unique_tags = [UNIQUE_TAG_TESTSTRINGS] # list of list which contains tags that
DONOTSUGGEST_PREFIX = '#donotsuggest '
do_not_suggest_tags = [] # list of lower-case strings
INCLUDE_PREFIX = '#include '
included_files = []
DESCRIPTION = "This tool adds or removes simple tags to/from file names.\n\
\n\
Tags within file names are placed between the actual file name and\n\
@ -1738,58 +1741,78 @@ def locate_and_parse_controlled_vocabulary(startfile):
global unique_tags
global do_not_suggest_tags
global included_files
if filename:
logging.debug('locate_and_parse_controlled_vocabulary: .filetags found: ' + filename)
if os.path.isfile(filename):
logging.debug('locate_and_parse_controlled_vocabulary: found controlled vocabulary')
tags = []
with codecs.open(filename, encoding='utf-8') as filehandle:
logging.debug('locate_and_parse_controlled_vocabulary: reading controlled vocabulary in [%s]' %
filename)
global controlled_vocabulary_filename
controlled_vocabulary_filename = filename
for rawline in filehandle:
if rawline.strip().lower().startswith(DONOTSUGGEST_PREFIX):
# parse and save do not suggest tags:
line = rawline[len(DONOTSUGGEST_PREFIX):].strip().lower()
for tag in line.split(BETWEEN_TAG_SEPARATOR):
do_not_suggest_tags.append(tag)
else:
# remove everyting after the first hash character (which is a comment separator)
line = rawline.strip().split('#')[0].strip() # split and take everything before the first '#' as new "line"
if len(line) == 0:
# nothing left, line consisted only of a comment or was empty
continue
if BETWEEN_TAG_SEPARATOR in line:
## if multiple tags are in one line, they are mutually exclusive: only has can be set via filetags
logging.debug('locate_and_parse_controlled_vocabulary: found unique tags: %s' %
(line))
unique_tags.append(line.split(BETWEEN_TAG_SEPARATOR))
for tag in line.split(BETWEEN_TAG_SEPARATOR):
# *also* append unique tags to general tag list:
tags.append(tag)
else:
tags.append(line)
logging.debug('locate_and_parse_controlled_vocabulary: controlled vocabulary has %i tags' %
len(tags))
logging.debug('locate_and_parse_controlled_vocabulary: controlled vocabulary has %i groups of unique tags' %
(len(unique_tags) - 1))
return tags
else:
logging.debug('locate_and_parse_controlled_vocabulary: controlled vocabulary is a non-existing file')
return []
return parse_controlled_vocabulary(filename)
else:
logging.debug('locate_and_parse_controlled_vocabulary: could not derive filename for controlled vocabulary')
return []
def parse_controlled_vocabulary(filename):
"""Parses a controlled vocabulary file."""
files_to_include = []
logging.debug('parse_controlled_vocabulary: .filetags found: ' + filename)
if os.path.isfile(filename):
logging.debug('parse_controlled_vocabulary: found controlled vocabulary')
included_files.append(os.path.realpath(filename))
tags = []
with codecs.open(filename, encoding='utf-8') as filehandle:
logging.debug('parse_controlled_vocabulary: reading controlled vocabulary in [%s]' %
filename)
global controlled_vocabulary_filename
controlled_vocabulary_filename = filename
for rawline in filehandle:
if rawline.strip().lower().startswith(INCLUDE_PREFIX):
file_to_include = rawline.strip().removeprefix(INCLUDE_PREFIX)
current_file_dir = os.path.dirname(filename)
file_path = os.path.realpath(os.path.join(current_file_dir, file_to_include))
logging.debug('parse_controlled_vocabulary: found include statement for file [%s]' % file_path)
if file_path not in included_files:
files_to_include.append(file_path)
logging.debug('parse_controlled_vocabulary: including file [%s]' % file_path)
elif rawline.strip().lower().startswith(DONOTSUGGEST_PREFIX):
# parse and save do not suggest tags:
line = rawline[len(DONOTSUGGEST_PREFIX):].strip().lower()
for tag in line.split(BETWEEN_TAG_SEPARATOR):
do_not_suggest_tags.append(tag)
else:
# remove everyting after the first hash character (which is a comment separator)
line = rawline.strip().split('#')[0].strip() # split and take everything before the first '#' as new "line"
if len(line) == 0:
# nothing left, line consisted only of a comment or was empty
continue
if BETWEEN_TAG_SEPARATOR in line:
## if multiple tags are in one line, they are mutually exclusive: only has can be set via filetags
logging.debug('parse_controlled_vocabulary: found unique tags: %s' %
(line))
unique_tags.append(line.split(BETWEEN_TAG_SEPARATOR))
for tag in line.split(BETWEEN_TAG_SEPARATOR):
# *also* append unique tags to general tag list:
tags.append(tag)
else:
tags.append(line)
for file in files_to_include:
tags.extend(parse_controlled_vocabulary(file))
logging.debug('parse_controlled_vocabulary: controlled vocabulary has %i tags' %
len(tags))
logging.debug('parse_controlled_vocabulary: controlled vocabulary has %i groups of unique tags' %
(len(unique_tags) - 1))
return tags
else:
logging.debug('parse_controlled_vocabulary: controlled vocabulary is a non-existing file')
return []
def print_tag_shortcut_with_numbers(tag_list, tags_get_added=True, tags_get_linked=False):
"""A list of tags from the list are printed to stdout. Each tag

View file

@ -462,9 +462,8 @@ class TestLocateAndParseControlledVocabulary(unittest.TestCase):
self.assertEqual(set(cv), set(["foo", "bar", "baz", "tag"]))
def test_include_lines_in_cv(self):
def test_include_lines_in_cv_not_circular(self):
"""
FIXXME!!!!
This tests does not use the setup from the test class. However, it does use several
other util functions defined in this class. Therefore, I set up a different test
case here and re-use the util functions.
@ -473,13 +472,95 @@ class TestLocateAndParseControlledVocabulary(unittest.TestCase):
tmpdir
`- subdir1
|
`- .filetags with a reference to subdir2/included_filetags
`- .filetags with a reference to subdir2/included.filetags
- subdir2
|
`- included_filetags with additional tags
"""
pass # FIXXME: implement
tempdir = tempfile.mkdtemp(prefix="TestControlledVocabulary_Include")
print("\ntempdir: " + tempdir + ' <<<' + '#' * 10)
subdir1 = os.path.join(tempdir, "subdir1")
os.makedirs(subdir1)
assert(os.path.exists(subdir1))
subdir2 = os.path.join(tempdir, "subdir2")
os.makedirs(subdir2)
assert(os.path.exists(subdir2))
include_cv = """
tag_from_include_before_CV
#include ../subdir2/included.filetags
tag_from_include_after_CV
"""
include_file = os.path.join(subdir1, '.filetags')
self.create_file(include_file, include_cv)
assert(os.path.isfile(include_file))
included_cv = 'tag_from_included_CV'
included_file = os.path.join(subdir2, 'included.filetags')
self.create_file(included_file, included_cv)
assert(os.path.isfile(included_file))
if platform.system() != 'Windows':
os.sync()
# setup complete
cv = filetags.locate_and_parse_controlled_vocabulary(include_file)
self.assertEqual(set(cv), set(["tag_from_include_before_CV", "tag_from_include_after_CV", "tag_from_included_CV"]))
def test_include_lines_in_cv_circular(self):
"""
This tests does not use the setup from the test class. However, it does use several
other util functions defined in this class. Therefore, I set up a different test
case here and re-use the util functions.
Setup looks like this:
tmpdir
`- subdir1
|
`- .filetags with a reference to subdir2/included.filetags
- subdir2
|
`- included.filetags with additional tags and reference to subdir1/.filetags
"""
tempdir = tempfile.mkdtemp(prefix="TestControlledVocabulary_Include")
print("\ntempdir: " + tempdir + ' <<<' + '#' * 10)
subdir1 = os.path.join(tempdir, "subdir1")
os.makedirs(subdir1)
assert(os.path.exists(subdir1))
subdir2 = os.path.join(tempdir, "subdir2")
os.makedirs(subdir2)
assert(os.path.exists(subdir2))
circular1_cv = """
tag_from_first_before_CV
#include ../subdir2/included.filetags
tag_from_first_after_CV
"""
circular1_file = os.path.join(subdir1, '.filetags')
self.create_file(circular1_file, circular1_cv)
assert(os.path.isfile(circular1_file))
circular2_cv = """
tag_from_second_before_CV
#include ../subdir1/.filetags
tag_from_second_after_CV
"""
circular2_file = os.path.join(subdir2, 'included.filetags')
self.create_file(circular2_file, circular2_cv)
assert(os.path.isfile(circular2_file))
if platform.system() != 'Windows':
os.sync()
# setup complete
cv = filetags.locate_and_parse_controlled_vocabulary(circular1_file)
self.assertEqual(set(cv), set(["tag_from_first_before_CV", "tag_from_first_after_CV", "tag_from_second_before_CV", "tag_from_second_after_CV"]))
class TestFileWithoutTags(unittest.TestCase):