--tagtrees can now be combined with --filter

This commit is contained in:
Karl Voit 2018-07-23 18:43:28 +02:00
parent 842d789327
commit e44d7ddecd
3 changed files with 188 additions and 64 deletions

View file

@ -317,6 +317,7 @@ tags that are most likely typos or abandoned
- 2018-04-25:
- added hints to [[https://github.com/novoid/integratethis][=integratethis=]] to ease the Windows Explorer
integration
- 2018-07-23: =--tagtrees== can now be filtered with =--filter=
** Get the most out of filetags: controlled vocabulary ~.filetags~
:PROPERTIES:

View file

@ -1,6 +1,6 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
PROG_VERSION = "Time-stamp: <2018-06-02 13:58:30 vk>"
PROG_VERSION = "Time-stamp: <2018-07-23 19:38:12 vk>"
# TODO:
# - fix parts marked with «FIXXME»
@ -191,7 +191,7 @@ parser.add_argument("--tagtrees", dest="tagtrees", action="store_true",
"up to a limit of " + str(DEFAULT_TAGTREES_MAXDEPTH) + ". Target directory " +
"can be overridden by --tagtrees-dir. " +
"Please note that this may take long since it relates " +
"exponentially to the number of tags involved. " +
"exponentially to the number of tags involved. Can be combined with --filter. " +
"See also http://Karl-Voit.at/tagstore/ and http://Karl-Voit.at/tagstore/downloads/Voit2012b.pdf")
parser.add_argument("--tagtrees-handle-no-tag",
@ -1986,7 +1986,7 @@ def get_common_tags_from_files(files):
return list(set.intersection(*list_of_tags_per_file))
def generate_tagtrees(directory, maxdepth, ignore_nontagged, nontagged_subdir, link_missing_mutual_tagged_items):
def generate_tagtrees(directory, maxdepth, ignore_nontagged, nontagged_subdir, link_missing_mutual_tagged_items, filtertags=None):
"""
This functions is somewhat sophisticated with regards to the background.
If you're really interested in the whole story behind the
@ -2056,6 +2056,7 @@ def generate_tagtrees(directory, maxdepth, ignore_nontagged, nontagged_subdir, l
@param ignore_nontagged: (bool) if True, non-tagged items are ignored and not linked
@param nontagged_subdir: (string) holds a string containing the sub-directory name to link non-tagged items to
@param link_missing_mutual_tagged_items: (bool) if True, any item that has a missing tag of any unique_tags entry is linked to a separate directory which is auto-generated from the unique_tags set names
@param filtertags: (list) if options.tagfilter is used, this list holds the tags to filter for (AND)
"""
assert_empty_tagfilter_directory(directory)
@ -2084,9 +2085,12 @@ def generate_tagtrees(directory, maxdepth, ignore_nontagged, nontagged_subdir, l
'So it looks like we\'ve got a shot-yourself-in-the-foot situation here … You can imagine that this was not ' + \
'even simple to find and catch while testing for me either. Or was it? Make an educated guess. :-)')
if filtertags:
files = filter_files_matching_tags(files, filtertags)
if len(files) == 0 and not options.recursive:
error_exit(10, 'There is no single file in the current directory "' + os.getcwd() + '". I can\'t create ' + \
'tagtrees from nothing. You gotta give me at least something to work with here, dude.')
'tagtrees from nothing. You gotta give me at least something to work with here, dude.')
# If a controlled vocabulary file is found for the directory where the tagtree
# should be generated for, we link this file to the resulting tagtrees root
@ -2293,6 +2297,7 @@ def start_filebrowser(directory):
current_platform + '\".')
logging.info('Please visit ' + directory + ' to view filtered items.')
def all_files_are_links_to_same_directory(files):
"""
This function returns True when: all files in "files" are links with same
@ -2326,6 +2331,64 @@ def all_files_are_links_to_same_directory(files):
return False
return True
def handle_option_tagtrees(filtertags=None):
"""
Handles the options and preprocessing for generating tagtrees.
@param: filtertags: (list) if options.tagfilter is used, this list contains the user-entered list of tags to filter for
"""
logging.debug("handling option for tagtrees")
# The command line options for tagtrees_handle_no_tag is checked:
ignore_nontagged = False
nontagged_subdir = False
if options.tagtrees_handle_no_tag:
if options.tagtrees_handle_no_tag[0] == 'treeroot':
logging.debug("options.tagtrees_handle_no_tag found: treeroot (default)")
pass # keep defaults
elif options.tagtrees_handle_no_tag[0] == 'ignore':
logging.debug("options.tagtrees_handle_no_tag found: ignore")
ignore_nontagged = True
else:
ignore_nontagged = False
nontagged_subdir = options.tagtrees_handle_no_tag[0]
logging.debug("options.tagtrees_handle_no_tag found: use foldername [" +
repr(options.tagtrees_handle_no_tag) + "]")
chosen_maxdepth = DEFAULT_TAGTREES_MAXDEPTH
if options.tagtrees_depth:
chosen_maxdepth = options.tagtrees_depth[0]
logging.debug('User overrides the default tagtrees depth to: ' +
str(chosen_maxdepth))
if chosen_maxdepth > 4:
logging.warning('The chosen tagtrees depth of ' +
str(chosen_maxdepth) + ' is rather high.')
logging.warning('When linking more than a few files, this ' +
'might take a long time using many filesystem inodes.')
# FIXXME 2018-04-04: following 4-lines block re-occurs for options.tagfilter: unify accordingly!
chosen_tagtrees_dir = TAGFILTER_DIRECTORY
if options.tagtrees_directory:
chosen_tagtrees_dir = options.tagtrees_directory[0]
logging.debug('User overrides the default tagtrees directory to: ' +
str(chosen_tagtrees_dir))
start = time.time()
generate_tagtrees(chosen_tagtrees_dir,
chosen_maxdepth,
ignore_nontagged,
nontagged_subdir,
options.tagtrees_link_missing_mutual_tagged_items,
filtertags)
delta = time.time() - start # it's a float
if delta > 3:
logging.info("Generated tagtrees in %.2f seconds" % delta)
start_filebrowser(chosen_tagtrees_dir)
successful_exit()
def successful_exit():
logging.debug("successfully finished.")
sys.stdout.flush()
@ -2358,7 +2421,7 @@ def main():
error_exit(7, "Please don't use that gardening option together with any other option.")
if options.tagfilter and (options.list_tags_by_number or options.list_tags_by_alphabet or
options.tags or options.tagtrees or options.tag_gardening):
options.tags or options.tag_gardening):
error_exit(14, "Please don't use that filter option together with any other option.")
if options.list_tags_by_number and (options.tagfilter or options.list_tags_by_alphabet or
@ -2373,7 +2436,7 @@ def main():
options.list_tags_by_alphabet or options.tagtrees or options.tag_gardening):
error_exit(17, "Please don't use that tags option together with any other option.")
if options.tagtrees and (options.tagfilter or options.list_tags_by_number or
if options.tagtrees and (options.list_tags_by_number or
options.list_tags_by_alphabet or options.tags or options.tag_gardening):
error_exit(18, "Please don't use the tagtrees option together with any other option.")
@ -2436,54 +2499,8 @@ def main():
handle_tag_gardening(vocabulary)
successful_exit()
elif options.tagtrees:
logging.debug("handling option for tagtrees")
# The command line options for tagtrees_handle_no_tag is checked:
ignore_nontagged = False
nontagged_subdir = False
if options.tagtrees_handle_no_tag:
if options.tagtrees_handle_no_tag[0] == 'treeroot':
logging.debug("options.tagtrees_handle_no_tag found: treeroot (default)")
pass # keep defaults
elif options.tagtrees_handle_no_tag[0] == 'ignore':
logging.debug("options.tagtrees_handle_no_tag found: ignore")
ignore_nontagged = True
else:
ignore_nontagged = False
nontagged_subdir = options.tagtrees_handle_no_tag[0]
logging.debug("options.tagtrees_handle_no_tag found: use foldername [" +
repr(options.tagtrees_handle_no_tag) + "]")
chosen_maxdepth = DEFAULT_TAGTREES_MAXDEPTH
if options.tagtrees_depth:
chosen_maxdepth = options.tagtrees_depth[0]
logging.debug('User overrides the default tagtrees depth to: ' +
str(chosen_maxdepth))
if chosen_maxdepth > 4:
logging.warning('The chosen tagtrees depth of ' +
str(chosen_maxdepth) + ' is rather high.')
logging.warning('When linking more than a few files, this ' +
'might take a long time using many filesystem inodes.')
# FIXXME 2018-04-04: following 4-lines block re-occurs for options.tagfilter: unify accordingly!
chosen_tagtrees_dir = TAGFILTER_DIRECTORY
if options.tagtrees_directory:
chosen_tagtrees_dir = options.tagtrees_directory[0]
logging.debug('User overrides the default tagtrees directory to: ' +
str(chosen_tagtrees_dir))
start = time.time()
generate_tagtrees(chosen_tagtrees_dir,
chosen_maxdepth,
ignore_nontagged,
nontagged_subdir,
options.tagtrees_link_missing_mutual_tagged_items)
delta = time.time() - start # it's a float
if delta > 3:
logging.info("Generated tagtrees in %.2f seconds" % delta)
start_filebrowser(chosen_tagtrees_dir)
successful_exit()
elif options.tagtrees and not options.tagfilter:
handle_option_tagtrees()
elif options.interactive or not options.tags:
@ -2507,7 +2524,7 @@ def main():
elif options.tagfilter:
# FIXXME 2018-04-04: following 4-lines block re-occurs for options.tagtagtrees: unify accordingly!
# FIXXME 2018-04-04: following 4-lines block re-occurs for options.tagtrees: unify accordingly!
chosen_tagtrees_dir = TAGFILTER_DIRECTORY
if options.tagtrees_directory:
chosen_tagtrees_dir = options.tagtrees_directory[0]
@ -2598,9 +2615,12 @@ def main():
elif options.interactive:
logging.info("processing tags \"%s\" ..." % str(BETWEEN_TAG_SEPARATOR.join(tags_from_userinput)))
if options.tagfilter and not files:
if options.tagfilter and not files and not options.tagtrees:
assert_empty_tagfilter_directory(chosen_tagtrees_dir)
files = filter_files_matching_tags(get_files_of_directory(os.getcwd()), tags_from_userinput)
elif options.tagfilter and not files and options.tagtrees:
# the combination of tagtrees and tagfilter requires user input of tags which was done above
handle_option_tagtrees(tags_from_userinput)
logging.debug("iterate over files ...")

View file

@ -1,6 +1,6 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Time-stamp: <2018-04-17 19:16:42 karl.voit>
# Time-stamp: <2018-07-23 18:40:23 karl.voit>
# invoke tests using following command line:
# ~/src/vktag % PYTHONPATH="~/src/filetags:" tests/unit_tests.py --verbose
@ -223,7 +223,6 @@ class TestMethods(unittest.TestCase):
self.assertEqual(filetags.extract_iso_datestamp_from_filename('2018-03-18T23.59 foo bar'),
['2018', '03', '18'])
def tearDown(self):
pass
@ -656,8 +655,25 @@ class TestFileWithoutTags(unittest.TestCase):
class TestHierarchyWithFilesAndFolders(unittest.TestCase):
tempdir = None
subdir1 = None
subdir2 = None
def setUp(self):
"""This setup function creates following dir/file structure:
tempdir (via tempfile.mkdtemp())
|_ "foo1 -- bar.txt"
|_ "foo2 -- bar baz.txt"
|_ "foo3 -- baz teststring1.txt"
|_ sub dir 1/
|_ "foo4 -- bar.txt"
|_ "foo5.txt"
|_ "foo6 -- baz teststring1.txt"
|_ "foo7 -- teststring1.txt"
|_ "foo8 -- baz.txt"
|_ "foo9 -- baz bar.txt"
|_ sub dir 2/ (empty)
"""
# create temporary directory:
self.tempdir = tempfile.mkdtemp()
@ -668,16 +684,30 @@ class TestHierarchyWithFilesAndFolders(unittest.TestCase):
self.assertEqual(filetags.get_tags_from_files_and_subfolders(self.tempdir, use_cache=False), {})
# create set of test files:
self.create_tmp_file("foo1 -- bar.txt")
self.create_tmp_file("foo2 -- bar baz.txt")
self.create_tmp_file("foo3 -- bar baz teststring1.txt")
self.create_tmp_file(self.tempdir, "foo1 -- bar.txt")
self.create_tmp_file(self.tempdir, "foo2 -- bar baz.txt")
self.create_tmp_file(self.tempdir, "foo3 -- baz teststring1.txt")
# create first sub directory and its files:
self.subdir1 = os.path.join(self.tempdir, "sub dir 1")
os.makedirs(self.subdir1)
self.create_tmp_file(self.subdir1, "foo4 -- bar.txt")
self.create_tmp_file(self.subdir1, "foo5.txt")
self.create_tmp_file(self.subdir1, "foo6 -- baz teststring1.txt")
self.create_tmp_file(self.subdir1, "foo7 -- teststring1.txt")
self.create_tmp_file(self.subdir1, "foo8 -- baz.txt")
self.create_tmp_file(self.subdir1, "foo9 -- baz bar.txt")
# create second sub directory (empty)
self.subdir2 = os.path.join(self.tempdir, "sub dir 2")
os.makedirs(self.subdir2)
if platform.system() != 'Windows':
os.sync()
def create_tmp_file(self, name):
def create_tmp_file(self, directory, name):
with open(os.path.join(self.tempdir, name), 'w') as outputhandle:
with open(os.path.join(directory, name), 'w') as outputhandle:
outputhandle.write('This is a test file for filetags unit testing')
def file_exists(self, name):
@ -691,7 +721,7 @@ class TestHierarchyWithFilesAndFolders(unittest.TestCase):
def test_get_tags_from_files_and_subfolders(self):
self.assertEqual(filetags.get_tags_from_files_and_subfolders(self.tempdir, use_cache=False),
{'baz': 2, 'bar': 3, 'teststring1': 1})
{'baz': 2, 'bar': 2, 'teststring1': 1})
# FIXXME: write test which tests the cache
@ -707,6 +737,79 @@ class TestHierarchyWithFilesAndFolders(unittest.TestCase):
print("FIXXME: test_locate_and_parse_controlled_vocabulary() not implemented yet")
def test_tagtrees_with_tagfilter_and_no_filtertag(self):
filetags.generate_tagtrees(directory=self.subdir2,
maxdepth=5,
ignore_nontagged=False,
nontagged_subdir='nontagged_items',
link_missing_mutual_tagged_items=False,
filtertags=None)
self.assertEqual(len(os.listdir(self.subdir2)), 4) # 4 entries in this directory
self.assertTrue(os.path.isdir(os.path.join(self.subdir2, 'bar')))
self.assertEqual(set(os.listdir(os.path.join(self.subdir2, 'bar'))),
set(['baz', 'foo1 -- bar.txt', 'foo2 -- bar baz.txt']))
self.assertTrue(os.path.isdir(os.path.join(self.subdir2, 'baz')))
self.assertEqual(set(os.listdir(os.path.join(self.subdir2, 'baz'))),
set(['bar', 'teststring1', 'foo2 -- bar baz.txt', 'foo3 -- baz teststring1.txt']))
self.assertTrue(os.path.isdir(os.path.join(self.subdir2, 'teststring1')))
self.assertEqual(set(os.listdir(os.path.join(self.subdir2, 'teststring1'))),
set(['baz', 'foo3 -- baz teststring1.txt']))
self.assertTrue(os.path.isdir(os.path.join(self.subdir2, 'nontagged_items')))
def test_tagtrees_with_tagfilter_and_one_filtertag(self):
filetags.generate_tagtrees(directory=self.subdir2,
maxdepth=5,
ignore_nontagged=False,
nontagged_subdir='nontagged_items',
link_missing_mutual_tagged_items=False,
filtertags=['teststring1'])
self.assertEqual(len(os.listdir(self.subdir2)), 3) # 3 entries in this directory
self.assertFalse(os.path.isdir(os.path.join(self.subdir2, 'bar')))
self.assertTrue(os.path.isdir(os.path.join(self.subdir2, 'baz')))
self.assertEqual(set(os.listdir(os.path.join(self.subdir2, 'baz'))),
set(['teststring1', 'foo3 -- baz teststring1.txt']))
self.assertTrue(os.path.isdir(os.path.join(self.subdir2, 'teststring1')))
self.assertEqual(set(os.listdir(os.path.join(self.subdir2, 'teststring1'))),
set(['baz', 'foo3 -- baz teststring1.txt']))
self.assertTrue(os.path.isdir(os.path.join(self.subdir2, 'nontagged_items')))
def test_tagtrees_with_tagfilter_and_multiple_filtertags(self):
filetags.generate_tagtrees(directory=self.subdir2,
maxdepth=5,
ignore_nontagged=False,
nontagged_subdir='nontagged_items',
link_missing_mutual_tagged_items=False,
filtertags=['teststring1', 'baz'])
self.assertEqual(set(os.listdir(self.subdir2)),
set(['teststring1', 'baz', 'nontagged_items']))
self.assertFalse(os.path.isdir(os.path.join(self.subdir2, 'bar')))
self.assertTrue(os.path.isdir(os.path.join(self.subdir2, 'baz')))
self.assertEqual(set(os.listdir(os.path.join(self.subdir2, 'baz'))),
set(['teststring1', 'foo3 -- baz teststring1.txt']))
self.assertTrue(os.path.isdir(os.path.join(self.subdir2, 'teststring1')))
self.assertTrue(os.path.isdir(os.path.join(self.subdir2, 'nontagged_items')))
def tearDown(self):
if platform.system() != 'Windows':