mirror of
https://github.com/novoid/filetags.git
synced 2026-02-17 06:14:17 +00:00
1846 lines
82 KiB
Python
Executable file
1846 lines
82 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
PROG_VERSION = "Time-stamp: <2017-11-11 18:20:45 vk>"
|
|
|
|
# TODO:
|
|
# - fix parts marked with «FIXXME»
|
|
# - $HOME/.config/ with default options (e.g., geeqie)
|
|
# - using clint/resource
|
|
# - if not found, write default config with defaults (and comments)
|
|
# - tagfilter: --copy :: copy files instead of creating symlinks
|
|
# - tagfilter: all toggle-cmd line args as special tags: --copy and so forth
|
|
# - e.g., when user enters tag "--copy" when interactively reading tags, handle it like options.copy
|
|
# - overwriting cmd-line arguments (if contradictory)
|
|
# - allow combination of cmd-line tags and interactive tags
|
|
# - they get combined
|
|
# - tagfilter: additional parameter to move matching files to a temporary subfolder
|
|
# - renaming/deleting of symlinks does not modify original files
|
|
# - tagfilter: --recursive :: recursively going into subdirectories and
|
|
# collecting items (into one target directory) for:
|
|
# - adding tags
|
|
# - removing tags
|
|
# - filter
|
|
# - tagfilter: --notags :: do not ask for tags, use all items that got no tag
|
|
# at all
|
|
# - tagfilter: --ignoredirs :: do not symlink/copy directories
|
|
# - tagfilter: --emptytmpdir :: empty temporary directory after the image viewer exits
|
|
# - use "open" to open first(?) file
|
|
|
|
|
|
# ===================================================================== ##
|
|
# You might not want to modify anything below this line if you do not ##
|
|
# know, what you are doing :-) ##
|
|
# ===================================================================== ##
|
|
|
|
import importlib
|
|
|
|
|
|
def save_import(library):
|
|
try:
|
|
globals()[library] = importlib.import_module(library)
|
|
except ImportError:
|
|
print("Could not find Python module \"" + library + "\".\nPlease install it, e.g., with \"sudo pip install " + library + "\".")
|
|
sys.exit(2)
|
|
|
|
|
|
import re
|
|
import sys
|
|
import os
|
|
save_import('argparse') # for handling command line arguments
|
|
save_import('time')
|
|
save_import('logging')
|
|
save_import('operator') # for sorting dicts
|
|
save_import('difflib') # for good enough matching words
|
|
save_import('readline') # for raw_input() reading from stdin
|
|
save_import('codecs') # for handling Unicode content in .tagfiles
|
|
save_import('math') # (integer) calculations
|
|
save_import('clint') # for config file handling
|
|
save_import('itertools') # for calculating permutations of tagtrees
|
|
|
|
PROG_VERSION_DATE = PROG_VERSION[13:23]
|
|
# unused: INVOCATION_TIME = time.strftime("%Y-%m-%dT%H:%M:%S", time.localtime())
|
|
FILENAME_TAG_SEPARATOR = ' -- '
|
|
BETWEEN_TAG_SEPARATOR = ' '
|
|
CONTROLLED_VOCABULARY_FILENAME = ".filetags"
|
|
HINT_FOR_BEING_IN_VOCABULARY_TEMPLATE = ' *'
|
|
TAGFILTER_DIRECTORY = os.path.join(os.path.expanduser("~"), ".filetags_tagfilter")
|
|
DEFAULT_TAGTREES_MAXDEPTH = 2 # be careful when making this more than 2: exponential growth of time/links with number of tags!
|
|
DEFAULT_IMAGE_VIEWER_LINUX = 'geeqie'
|
|
TAG_SYMLINK_ORIGINALS_WHEN_TAGGING_SYMLINKS = True
|
|
|
|
try:
|
|
TTY_HEIGHT, TTY_WIDTH = [int(x) for x in os.popen('stty size', 'r').read().split()]
|
|
except ValueError:
|
|
TTY_HEIGHT, TTY_WIDTH = 80, 80
|
|
|
|
max_file_length = 0 # will be set after iterating over source files182
|
|
|
|
unique_tags = [['teststring1', 'teststring2']] # list of list which contains tags that are mutually exclusive
|
|
# Note: u'teststring1' and u'teststring2' are hard-coded for testing purposes.
|
|
# You might delete them if you don't use my unit test suite.
|
|
|
|
DESCRIPTION = "This tool adds or removes simple tags to/from file names.\n\
|
|
\n\
|
|
Tags within file names are placed between the actual file name and\n\
|
|
the file extension, separated with \"" + FILENAME_TAG_SEPARATOR + "\". Multiple tags are\n\
|
|
separated with \"" + BETWEEN_TAG_SEPARATOR + "\":\n\
|
|
Update for the Boss" + FILENAME_TAG_SEPARATOR + "projectA" + BETWEEN_TAG_SEPARATOR + "presentation.pptx\n\
|
|
2013-05-16T15.31.42 Error message" + FILENAME_TAG_SEPARATOR + "screenshot" + BETWEEN_TAG_SEPARATOR + "projectB.png\n\
|
|
\n\
|
|
This easy to use tag system has a drawback: for tagging a larger\n\
|
|
set of files with the same tag, you have to rename each file\n\
|
|
separately. With this tool, this only requires one step.\n\
|
|
\n\
|
|
Example usages:\n\
|
|
" + sys.argv[0] + " --tags=\"presentation projectA\" *.pptx\n\
|
|
… adds the tags \"presentation\" and \"projectA\" to all PPTX-files\n\
|
|
" + sys.argv[0] + " --tags=\"presentation -projectA\" *.pptx\n\
|
|
… adds the tag \"presentation\" to and removes tag \"projectA\" from all PPTX-files\n\
|
|
" + sys.argv[0] + " -i *\n\
|
|
… ask for tag(s) and add them to all files in current folder\n\
|
|
" + sys.argv[0] + " -r draft *report*\n\
|
|
… removes the tag \"draft\" from all files containing the word \"report\"\n\
|
|
\n\
|
|
\n\
|
|
This tools is looking for the optional first text file named \".filetags\" in\n\
|
|
current and parent directories. Each of its lines is interpreted as a tag\n\
|
|
for tag completion. Multiple tags per line are considered mutual exclusive.\n\
|
|
\n\
|
|
Verbose description: http://Karl-Voit.at/managing-digital-photographs/"
|
|
|
|
EPILOG = u"\n\
|
|
:copyright: (c) by Karl Voit <tools@Karl-Voit.at>\n\
|
|
:license: GPL v3 or any later version\n\
|
|
:URL: https://github.com/novoid/filetag\n\
|
|
:bugreports: via github or <tools@Karl-Voit.at>\n\
|
|
:version: " + PROG_VERSION_DATE + "\n·\n"
|
|
|
|
|
|
# file names containing tags matches following regular expression
|
|
FILE_WITH_TAGS_REGEX = re.compile("(.+?)" + FILENAME_TAG_SEPARATOR + "(.+?)(\.(\w+))??$")
|
|
FILE_WITH_TAGS_REGEX_FILENAME_INDEX = 1 # component.group(1)
|
|
FILE_WITH_TAGS_REGEX_TAGLIST_INDEX = 2
|
|
FILE_WITH_TAGS_REGEX_EXTENSION_INDEX = 4
|
|
|
|
FILE_WITH_EXTENSION_REGEX = re.compile("(.*)\.(.*)$")
|
|
FILE_WITH_EXTENSION_REGEX_FILENAME_INDEX = 1
|
|
FILE_WITH_EXTENSION_REGEX_EXTENSION_INDEX = 2
|
|
|
|
cache_of_tags_by_folder = {}
|
|
controlled_vocabulary_filename = ''
|
|
list_of_symlink_directories = []
|
|
|
|
parser = argparse.ArgumentParser(prog=sys.argv[0],
|
|
# keep line breaks in EPILOG and such
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog=EPILOG,
|
|
description=DESCRIPTION)
|
|
|
|
parser.add_argument(dest="files", metavar='FILE', nargs='*', help='One or more files to tag')
|
|
|
|
parser.add_argument("-t", "--tags",
|
|
dest="tags",
|
|
nargs=1,
|
|
type=str,
|
|
metavar='"STRING WITH TAGS"',
|
|
required=False,
|
|
help="one or more tags (in quotes, separated by spaces) to add/remove")
|
|
|
|
parser.add_argument("--remove", action="store_true",
|
|
help="remove tags from (instead of adding to) file name(s)")
|
|
|
|
parser.add_argument("-i", "--interactive", action="store_true", dest="interactive",
|
|
help="interactive mode: ask for (a)dding or (r)emoving and name of tag(s)")
|
|
|
|
parser.add_argument("-R", "--recursive", dest="recursive", action="store_true",
|
|
help="recursively go through the current directory and all of its subdirectories. Implemented for --tag-gardening and --tagtrees")
|
|
|
|
parser.add_argument("-s", "--dryrun", dest="dryrun", action="store_true",
|
|
help="enable dryrun mode: just simulate what would happen, do not modify files")
|
|
|
|
parser.add_argument("-f", "--filter", dest="tagfilter", action="store_true",
|
|
help="ask for list of tags and generate links in \"" + TAGFILTER_DIRECTORY + "\" " +
|
|
"containing symbolic links to all files with matching tags and start the filebrowser")
|
|
|
|
parser.add_argument("--filebrowser", dest="filebrowser", metavar='PATH_TO_FILEBROWSER',
|
|
help="use this option to override the tool to view/manage files (for --filter; default: " +
|
|
DEFAULT_IMAGE_VIEWER_LINUX + ")")
|
|
|
|
parser.add_argument("--tagtrees", dest="tagtrees", action="store_true",
|
|
help="This generates nested directories in \"" + TAGFILTER_DIRECTORY + "\" for each combination of tags " +
|
|
"up to a limit of " + str(DEFAULT_TAGTREES_MAXDEPTH) + ". " +
|
|
"Please note that this may take long since it relates " +
|
|
"exponentially to the number of tags involved. " +
|
|
"See also http://Karl-Voit.at/tagstore/ and http://Karl-Voit.at/tagstore/downloads/Voit2012b.pdf")
|
|
|
|
parser.add_argument("--tagtrees-handle-no-tag",
|
|
dest="tagtrees_handle_no_tag",
|
|
nargs=1,
|
|
type=str,
|
|
metavar='"treeroot" | "ignore" | "FOLDERNAME"',
|
|
required=False,
|
|
help="When tagtrees are created, this parameter defines how to handle items that got no tag at all. " +
|
|
"The value \"treeroot\" is the default behavior: items without a tag are linked to the tagtrees root. " +
|
|
"The value \"ignore\" will not link any non-tagged items at all. " +
|
|
"Any other value is interpreted as a folder name within the tagreees which is used to link all non-tagged items to.")
|
|
|
|
parser.add_argument("--ln", "--list-tags-by-number", dest="list_tags_by_number", action="store_true",
|
|
help="list all file-tags sorted by their number of use")
|
|
|
|
parser.add_argument("--la", "--list-tags-by-alphabet", dest="list_tags_by_alphabet", action="store_true",
|
|
help="list all file-tags sorted by their name")
|
|
|
|
parser.add_argument("--lu", "--list-tags-unknown-to-vocabulary", dest="list_unknown_tags", action="store_true",
|
|
help="list all file-tags which are found in file names but are not part of .filetags")
|
|
|
|
parser.add_argument("--tag-gardening", dest="tag_gardening", action="store_true",
|
|
help="This is for getting an overview on tags that might require to be renamed (typos, " +
|
|
"singular/plural, ...). See also http://www.webology.org/2008/v5n3/a58.html")
|
|
|
|
parser.add_argument("-v", "--verbose", dest="verbose", action="store_true",
|
|
help="enable verbose mode")
|
|
|
|
parser.add_argument("-q", "--quiet", dest="quiet", action="store_true",
|
|
help="enable quiet mode")
|
|
|
|
parser.add_argument("--version", dest="version", action="store_true",
|
|
help="display version and exit")
|
|
|
|
options = parser.parse_args()
|
|
|
|
|
|
def handle_logging():
|
|
"""Log handling and configuration"""
|
|
|
|
if options.verbose:
|
|
FORMAT = "%(levelname)-8s %(asctime)-15s %(message)s"
|
|
logging.basicConfig(level=logging.DEBUG, format=FORMAT)
|
|
elif options.quiet:
|
|
FORMAT = "%(levelname)-8s %(message)s"
|
|
logging.basicConfig(level=logging.ERROR, format=FORMAT)
|
|
else:
|
|
FORMAT = "%(levelname)-8s %(message)s"
|
|
logging.basicConfig(level=logging.INFO, format=FORMAT)
|
|
|
|
|
|
def error_exit(errorcode, text):
|
|
"""exits with return value of errorcode and prints to stderr"""
|
|
|
|
sys.stdout.flush()
|
|
logging.error(text)
|
|
|
|
sys.exit(errorcode)
|
|
|
|
|
|
class SimpleCompleter(object):
|
|
# happily stolen from http://pymotw.com/2/readline/
|
|
|
|
def __init__(self, options):
|
|
self.options = sorted(options)
|
|
|
|
# removing '-' as a delimiter character in order to be able to use '-tagname' for removing:
|
|
readline.set_completer_delims(readline.get_completer_delims().replace('-', ''))
|
|
|
|
return
|
|
|
|
def complete(self, text, state):
|
|
response = None
|
|
if state == 0:
|
|
# This is the first time for this text, so build a match list.
|
|
if text:
|
|
self.matches = [s
|
|
for s in self.options
|
|
if s and s.startswith(text)]
|
|
logging.debug('%s matches: %s', repr(text), self.matches)
|
|
else:
|
|
self.matches = self.options[:]
|
|
logging.debug('(empty input) matches: %s', self.matches)
|
|
|
|
# Return the state'th item from the match list,
|
|
# if we have that many.
|
|
try:
|
|
response = self.matches[state]
|
|
except IndexError:
|
|
response = None
|
|
logging.debug('complete(%s, %s) => %s',
|
|
repr(text), state, repr(response))
|
|
return response
|
|
|
|
|
|
def contains_tag(filename, tagname=False):
|
|
"""
|
|
Returns true if tagname is a tag within filename. If tagname is
|
|
empty, return if filename contains any tag at all.
|
|
|
|
@param filename: an unicode string containing a file name
|
|
@param tagname: (optional) an unicode string containing a tag name
|
|
@param return: True|False
|
|
"""
|
|
|
|
assert(filename.__class__ == str)
|
|
if tagname:
|
|
assert(tagname.__class__ == str)
|
|
|
|
components = re.match(FILE_WITH_TAGS_REGEX, os.path.basename(filename))
|
|
|
|
if not tagname:
|
|
return components is not None
|
|
elif not components:
|
|
logging.debug("file [%s] does not match FILE_WITH_TAGS_REGEX" % filename)
|
|
return False
|
|
else:
|
|
tags = components.group(FILE_WITH_TAGS_REGEX_TAGLIST_INDEX).split(BETWEEN_TAG_SEPARATOR)
|
|
return tagname in tags
|
|
|
|
|
|
def extract_tags_from_filename(filename):
|
|
"""
|
|
Returns list of tags contained within filename. If no tag is
|
|
found, return False.
|
|
|
|
@param filename: an unicode string containing a file name
|
|
@param return: list of tags
|
|
"""
|
|
|
|
assert(filename.__class__ == str)
|
|
|
|
components = re.match(FILE_WITH_TAGS_REGEX, os.path.basename(filename))
|
|
|
|
if not components:
|
|
return []
|
|
else:
|
|
return components.group(FILE_WITH_TAGS_REGEX_TAGLIST_INDEX).split(BETWEEN_TAG_SEPARATOR)
|
|
|
|
|
|
def adding_tag_to_filename(filename, tagname):
|
|
"""
|
|
Returns string of file name with tagname as additional tag.
|
|
|
|
@param filename: an unicode string containing a file name
|
|
@param tagname: an unicode string containing a tag name
|
|
@param return: an unicode string of filename containing tagname
|
|
"""
|
|
|
|
assert(filename.__class__ == str)
|
|
assert(tagname.__class__ == str)
|
|
|
|
dirname = os.path.dirname(filename)
|
|
basename = os.path.basename(filename)
|
|
|
|
if contains_tag(basename) is False:
|
|
logging.debug("adding_tag_to_filename(%s, %s): no tag found so far" % (filename, tagname))
|
|
|
|
components = re.match(FILE_WITH_EXTENSION_REGEX, os.path.basename(basename))
|
|
if components:
|
|
old_basename = components.group(FILE_WITH_EXTENSION_REGEX_FILENAME_INDEX)
|
|
extension = components.group(FILE_WITH_EXTENSION_REGEX_EXTENSION_INDEX)
|
|
return os.path.join(dirname, old_basename + FILENAME_TAG_SEPARATOR + tagname + '.' + extension)
|
|
else:
|
|
return os.path.join(dirname, basename + FILENAME_TAG_SEPARATOR + tagname)
|
|
|
|
elif contains_tag(basename, tagname):
|
|
logging.debug("adding_tag_to_filename(%s, %s): tag already found in filename" % (filename, tagname))
|
|
|
|
return filename
|
|
|
|
else:
|
|
logging.debug("adding_tag_to_filename(%s, %s): add as additional tag to existing list of tags" %
|
|
(filename, tagname))
|
|
|
|
components = re.match(FILE_WITH_EXTENSION_REGEX, basename)
|
|
if components:
|
|
old_basename = components.group(FILE_WITH_EXTENSION_REGEX_FILENAME_INDEX)
|
|
extension = components.group(FILE_WITH_EXTENSION_REGEX_EXTENSION_INDEX)
|
|
return os.path.join(dirname, old_basename + BETWEEN_TAG_SEPARATOR + tagname + '.' + extension)
|
|
else:
|
|
return os.path.join(dirname, basename + BETWEEN_TAG_SEPARATOR + tagname)
|
|
|
|
|
|
def removing_tag_from_filename(filename, tagname):
|
|
"""
|
|
Returns string of file name with tagname removed as tag.
|
|
|
|
@param filename: an unicode string containing a file name
|
|
@param tagname: an unicode string containing a tag name
|
|
@param return: an unicode string of filename without tagname
|
|
"""
|
|
|
|
assert(filename.__class__ == str)
|
|
assert(tagname.__class__ == str)
|
|
|
|
if not contains_tag(filename, tagname):
|
|
return filename
|
|
|
|
components = re.match(FILE_WITH_TAGS_REGEX, filename)
|
|
|
|
if not components:
|
|
logging.debug("file [%s] does not match FILE_WITH_TAGS_REGEX" % filename)
|
|
return filename
|
|
else:
|
|
tags = components.group(FILE_WITH_TAGS_REGEX_TAGLIST_INDEX).split(BETWEEN_TAG_SEPARATOR)
|
|
old_filename = components.group(FILE_WITH_TAGS_REGEX_FILENAME_INDEX)
|
|
extension = components.group(FILE_WITH_TAGS_REGEX_EXTENSION_INDEX)
|
|
if not extension:
|
|
extension = ''
|
|
else:
|
|
extension = '.' + extension
|
|
|
|
if len(tags) < 2:
|
|
logging.debug("given tagname is the only tag -> remove all tags and FILENAME_TAG_SEPARATOR as well")
|
|
return old_filename + extension
|
|
else:
|
|
# still tags left
|
|
return old_filename + FILENAME_TAG_SEPARATOR + \
|
|
BETWEEN_TAG_SEPARATOR.join([tag for tag in tags if tag != tagname]) + extension
|
|
|
|
|
|
def extract_tags_from_argument(argument):
|
|
"""
|
|
@param argument: string containing one or more tags
|
|
@param return: a list of unicode tags
|
|
"""
|
|
|
|
assert(argument.__class__ == str)
|
|
|
|
if len(argument) > 0:
|
|
return argument.split(str(BETWEEN_TAG_SEPARATOR))
|
|
else:
|
|
return False
|
|
|
|
|
|
def extract_filenames_from_argument(argument):
|
|
"""
|
|
@param argument: string containing one or more file names
|
|
@param return: a list of unicode file names
|
|
"""
|
|
|
|
# FIXXME: works at my computer without need to convertion but add check later on
|
|
return argument
|
|
|
|
|
|
def get_unique_tags_from_filename(filename):
|
|
"""
|
|
Extracts tags that occur in the array of arrays "unique_tags".
|
|
|
|
@param filename: string containing one file name
|
|
@param return: list of found tags
|
|
"""
|
|
|
|
filetags = extract_tags_from_filename(filename)
|
|
result = []
|
|
for tag in filetags:
|
|
for taggroup in unique_tags:
|
|
if tag in taggroup:
|
|
result.append(tag)
|
|
return result
|
|
|
|
|
|
def item_contained_in_list_of_lists(item, list_of_lists):
|
|
"""
|
|
Returns true if item is member of at least one list in list_of_lists.
|
|
|
|
@param item: item too look for in list_of_lists
|
|
@param list_of_lists: list containing a list of items
|
|
@param return: (item, list) or None
|
|
"""
|
|
|
|
for current_list in list_of_lists:
|
|
if item in current_list:
|
|
return item, current_list
|
|
return None, None
|
|
|
|
|
|
def print_item_transition(path, source, destination, transition):
|
|
"""
|
|
Returns true if item is member of at least one list in list_of_lists.
|
|
|
|
@param path: string containing the path to the files
|
|
@param source: string of basename of filename before transition
|
|
@param destination: string of basename of filename after transition or target
|
|
@param transision: string which determines type of transision: ("add", "delete", "link")
|
|
@param return: N/A
|
|
"""
|
|
|
|
transition_description = ''
|
|
if transition == 'add':
|
|
transition_description = 'renaming'
|
|
elif transition == 'delete':
|
|
transition_description = 'renaming'
|
|
elif transition == 'link':
|
|
transition_description = 'linking'
|
|
else:
|
|
print("ERROR: print_item_transition(): unknown transition parameter: \"" + transition + "\"")
|
|
|
|
if 15 + len(transition_description) + (2 * max_file_length) < TTY_WIDTH:
|
|
# probably enough space: screen output with one item per line
|
|
|
|
source_width = max_file_length
|
|
|
|
arrow_left = '――'
|
|
arrow_right = '―→'
|
|
print(" {0:<{width}s} {1:s}{2:s}{3:s} {4:s}".format(source, arrow_left, transition_description, arrow_right, destination, width=source_width))
|
|
|
|
else:
|
|
# for narrow screens (and long file names): split up item source/destination in two lines
|
|
|
|
print(" {0:<{width}s} \"{1:s}\"".format(transition_description, source, width=len(transition_description)))
|
|
print(" {0:<{width}s} ⤷ \"{1:s}\"".format(' ', destination, width=len(transition_description)))
|
|
|
|
|
|
def find_unique_alternative_to_file(filename):
|
|
"""
|
|
@param filename: string containing one file name which does not exist
|
|
@param return: False or filename that starts with same substring within this directory
|
|
"""
|
|
|
|
logging.debug("file type error for file [%s] in folder [%s]: file type: is file? %s - is dir? %s - is mount? %s" %
|
|
(filename, os.getcwd(), str(os.path.isfile(filename)), str(os.path.isdir(filename)), str(os.path.islink(filename))))
|
|
logging.debug("trying to find a unique file starting with the same characters ...")
|
|
|
|
path = os.path.dirname(filename)
|
|
if len(path) < 1:
|
|
path = os.getcwd()
|
|
|
|
# get existing filenames of the directory of filename:
|
|
existingfilenames = []
|
|
for (dirpath, dirnames, filenames) in os.walk(path):
|
|
existingfilenames.extend(filenames)
|
|
break
|
|
|
|
# reduce filename one character by character from the end and see if any
|
|
# existing filename starts with this substring:
|
|
matchingfilenames = []
|
|
filenamesubstring = filename # start with the whole filename to match cases where filename is a complete substring
|
|
for i in range(len(filename)):
|
|
for existingfilename in existingfilenames:
|
|
# logging.debug('Checking substring [%s] with existing filename [%s]' % (filenamesubstring, existingfilename))
|
|
if existingfilename.startswith(filenamesubstring):
|
|
matchingfilenames.append(existingfilename)
|
|
if matchingfilenames:
|
|
logging.debug('For substring [%s] I found existing filenames: %s' % (filenamesubstring, str(matchingfilenames)))
|
|
if len(matchingfilenames) > 1:
|
|
logging.debug('Can not use an alternative filename since it is not unique')
|
|
break
|
|
filenamesubstring = filename[:-(i + 1)] # get rid of the last character of filename, one by one
|
|
|
|
# see if the list of matchingfilenames is unique (contains one entry)
|
|
if len(matchingfilenames) == 1:
|
|
return matchingfilenames[0]
|
|
else:
|
|
return False
|
|
|
|
|
|
def is_nonbroken_symlink_file(filename):
|
|
"""
|
|
Returns true if the filename is a non-broken symbolic link and not just an ordinary file. False, for any other case like no file at all.
|
|
|
|
@param filename: an unicode string containing a file name
|
|
@param return: bookean
|
|
"""
|
|
|
|
if os.path.isfile(filename):
|
|
if os.path.islink(filename):
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
|
|
def get_link_source_file(filename):
|
|
"""
|
|
Return a string representing the path to which the symbolic link points.
|
|
|
|
@param filename: an unicode string containing a file name
|
|
@param return: file path string
|
|
"""
|
|
|
|
assert(os.path.islink(filename))
|
|
return os.readlink(filename)
|
|
|
|
|
|
def is_broken_link(name):
|
|
"""
|
|
This function determines if the given name points to a file that is a broken link.
|
|
It returns False for any other cases such as non existing files and so forth.
|
|
|
|
@param name: an unicode string containing a file name
|
|
@param return: boolean
|
|
"""
|
|
|
|
if os.path.isfile(name) or os.path.isdir(name):
|
|
return False
|
|
|
|
try:
|
|
return not os.path.exists(os.readlink(name))
|
|
except FileNotFoundError:
|
|
return False
|
|
|
|
|
|
def split_up_filename(filename):
|
|
"""
|
|
Returns separate strings for the given filename.
|
|
|
|
@param filename: an unicode string containing a file name
|
|
@param return: filename with absolute path, pathname, basename
|
|
"""
|
|
|
|
basename = os.path.basename(filename)
|
|
dirname = os.path.abspath(os.path.dirname(filename))
|
|
return os.path.join(dirname, basename), dirname, basename
|
|
|
|
|
|
def handle_file_and_symlink_source_if_found(orig_filename, tags, do_remove, do_filter, dryrun):
|
|
"""
|
|
@param orig_filename: string containing one file name
|
|
@param tags: list containing one or more tags
|
|
@param do_remove: boolean which defines if tags should be added (False) or removed (True)
|
|
@param dryrun: boolean which defines if files should be changed (False) or not (True)
|
|
@param return: error value or new filename
|
|
"""
|
|
|
|
logging.debug("handle_file_and_symlink_source_if_found(\"" + orig_filename + "\") … " + '★' * 20)
|
|
|
|
if os.path.isdir(orig_filename):
|
|
logging.warning("Skipping directory \"%s\" because this tool only renames file names." % orig_filename)
|
|
return
|
|
|
|
filename, dirname, basename = split_up_filename(orig_filename)
|
|
global list_of_symlink_directories
|
|
|
|
if not (os.path.isfile(filename) or os.path.islink(filename)):
|
|
logging.debug('handle_file_and_symlink_source_if_found: this is no regular file nor a symlink; looking for an alternative file that starts with same substring …')
|
|
|
|
# try to find unique alternative file:
|
|
alternative_filename = find_unique_alternative_to_file(filename)
|
|
|
|
if not alternative_filename:
|
|
logging.debug('handle_file_and_symlink_source_if_found: Could not locate alternative basename that starts with same substring')
|
|
logging.error("Skipping \"%s\" because this tool only renames existing file names." % filename)
|
|
return
|
|
else:
|
|
logging.info("Could not find basename \"%s\" but found \"%s\" instead which starts with same substring ..." %
|
|
(filename, alternative_filename))
|
|
filename, dirname, basename = split_up_filename(orig_filename)
|
|
|
|
if dirname:
|
|
logging.debug("handle_file_and_symlink_source_if_found: changing to dir \"%s\"" % dirname)
|
|
os.chdir(dirname)
|
|
else:
|
|
logging.debug("handle_file_and_symlink_source_if_found: no dirname found")
|
|
|
|
# if basename is a symbolic link and has same basename, tag the source file as well:
|
|
if TAG_SYMLINK_ORIGINALS_WHEN_TAGGING_SYMLINKS and is_nonbroken_symlink_file(filename):
|
|
logging.debug('handle_file_and_symlink_source_if_found: file is a non-broken symlink and TAG_SYMLINK_ORIGINALS_WHEN_TAGGING_SYMLINKS is set')
|
|
|
|
old_source_filename, old_source_dirname, old_source_basename = split_up_filename(get_link_source_file(basename))
|
|
|
|
if old_source_basename == basename:
|
|
logging.debug('handle_file_and_symlink_source_if_found: symlink "' + filename +
|
|
'" has same basename as its source file "' + old_source_filename + '"')
|
|
|
|
new_source_basename = handle_file_and_symlink_source_if_found(old_source_filename, tags, do_remove, do_filter, dryrun)
|
|
new_source_filename = os.path.join(old_source_dirname, new_source_basename)
|
|
|
|
if old_source_basename != new_source_basename:
|
|
logging.debug('handle_file_and_symlink_source_if_found: Tagging the symlink-destination file of "' + basename + '" ("' +
|
|
old_source_filename + '") as well …')
|
|
|
|
if options.dryrun:
|
|
logging.debug('handle_file_and_symlink_source_if_found: I would re-link the old sourcefilename "'
|
|
+ old_source_filename +
|
|
'" to the new one "' + new_source_filename + '"')
|
|
else:
|
|
logging.debug('handle_file_and_symlink_source_if_found: re-linking symlink "' + os.path.join(dirname, basename) +
|
|
'" from the old sourcefilename "' +
|
|
old_source_filename + '" to the new one "' + new_source_filename + '"')
|
|
os.remove(filename)
|
|
os.symlink(new_source_filename, filename)
|
|
else:
|
|
logging.debug('handle_file_and_symlink_source_if_found: The old sourcefilename "' + old_source_filename +
|
|
'" did not change. So therefore I don\'t re-link.')
|
|
else:
|
|
logging.debug('handle_file_and_symlink_source_if_found: The file "' + filename +
|
|
'" is a symlink to "' + old_source_filename +
|
|
'" but they two do have different basenames. Therefore I ignore the original file.')
|
|
os.chdir(dirname) # go back to original dir after handling symlinks of different directories
|
|
else:
|
|
logging.debug('handle_file_and_symlink_source_if_found: file is not a non-broken symlink (' +
|
|
repr(is_nonbroken_symlink_file(basename)) + ') or TAG_SYMLINK_ORIGINALS_WHEN_TAGGING_SYMLINKS is not set')
|
|
|
|
# after handling potential symlink originals, I now handle the file we were talking about in the first place:
|
|
new_filename = handle_file(filename, tags, do_remove, do_filter, dryrun)
|
|
|
|
return new_filename
|
|
|
|
|
|
def handle_file(orig_filename, tags, do_remove, do_filter, dryrun):
|
|
"""
|
|
@param orig_filename: string containing one file name with absolute path
|
|
@param tags: list containing one or more tags
|
|
@param do_remove: boolean which defines if tags should be added (False) or removed (True)
|
|
@param dryrun: boolean which defines if files should be changed (False) or not (True)
|
|
@param return: error value or new filename
|
|
"""
|
|
|
|
assert(orig_filename.__class__ == str)
|
|
assert(tags.__class__ == list)
|
|
if do_remove:
|
|
assert(do_remove.__class__ == bool)
|
|
if do_filter:
|
|
assert(do_filter.__class__ == bool)
|
|
if dryrun:
|
|
assert(dryrun.__class__ == bool)
|
|
|
|
filename, dirname, basename = split_up_filename(orig_filename)
|
|
|
|
logging.debug("handle_file(\"" + filename + "\") … with woring dir \"" + os.getcwd() + "\"")
|
|
|
|
if do_filter:
|
|
print_item_transition(dirname, basename, TAGFILTER_DIRECTORY, transition='link')
|
|
if not dryrun:
|
|
os.symlink(filename, os.path.join(TAGFILTER_DIRECTORY, basename))
|
|
|
|
else: # add or remove tags:
|
|
new_basename = basename
|
|
logging.debug('handle_file: set new_basename [' + new_basename + '] according to parameters (initialization)')
|
|
|
|
for tagname in tags:
|
|
if do_remove:
|
|
new_basename = removing_tag_from_filename(new_basename, tagname)
|
|
logging.debug('handle_file: set new_basename [' + new_basename + '] when do_remove')
|
|
elif tagname[0] == '-':
|
|
new_basename = removing_tag_from_filename(new_basename, tagname[1:])
|
|
logging.debug('handle_file: set new_basename [' + new_basename + '] when tag starts with a minus')
|
|
else:
|
|
# FIXXME: not performance optimized for large number of unique tags in many lists:
|
|
tag_in_unique_tags, matching_unique_tag_list = item_contained_in_list_of_lists(tagname, unique_tags)
|
|
|
|
if tagname != tag_in_unique_tags:
|
|
new_basename = adding_tag_to_filename(new_basename, tagname)
|
|
logging.debug('handle_file: set new_basename [' + new_basename + '] when tagname != tag_in_unique_tags')
|
|
else:
|
|
# if tag within unique_tags found, and new unique tag is given, remove old tag:
|
|
# e.g.: unique_tags = (u'yes', u'no') -> if 'no' should be added, remove existing tag 'yes' (and vice versa)
|
|
# If user enters contradicting tags, only the last one will be applied.
|
|
# FIXXME: this is an undocumented feature -> please add proper documentation
|
|
|
|
current_filename_tags = extract_tags_from_filename(new_basename)
|
|
conflicting_tags = list(set(current_filename_tags).intersection(matching_unique_tag_list))
|
|
logging.debug("handle_file: found unique tag %s which require old unique tag(s) to be removed: %s" % (tagname, repr(conflicting_tags)))
|
|
for conflicting_tag in conflicting_tags:
|
|
new_basename = removing_tag_from_filename(new_basename, conflicting_tag)
|
|
logging.debug('handle_file: set new_basename [' + new_basename + '] when conflicting_tag in conflicting_tags')
|
|
new_basename = adding_tag_to_filename(new_basename, tagname)
|
|
logging.debug('handle_file: set new_basename [' + new_basename + '] after adding_tag_to_filename()')
|
|
|
|
new_filename = os.path.join(dirname, new_basename)
|
|
|
|
if do_remove:
|
|
transition = 'delete'
|
|
else:
|
|
transition = 'add'
|
|
|
|
if basename != new_basename:
|
|
|
|
list_of_symlink_directories.append(dirname)
|
|
|
|
if len(list_of_symlink_directories) > 1:
|
|
logging.debug('new_filename is a symlink. Screen output of transistion gets postponed to later on.')
|
|
elif not options.quiet:
|
|
print_item_transition(dirname, basename, new_basename, transition=transition)
|
|
|
|
if not dryrun:
|
|
os.rename(filename, new_filename)
|
|
|
|
return new_filename
|
|
|
|
|
|
def add_tag_to_countdict(tag, tags):
|
|
"""
|
|
Takes a tag (string) and a dict. Returns the dict with count value increased by one
|
|
|
|
@param tag: a (unicode) string
|
|
@param tags: dict of tags
|
|
@param return: dict of tags with incremented counter of tag (or 0 if new)
|
|
"""
|
|
|
|
assert(tag.__class__ == str)
|
|
assert(tags.__class__ == dict)
|
|
|
|
if tag in list(tags.keys()):
|
|
tags[tag] = tags[tag] + 1
|
|
else:
|
|
tags[tag] = 1
|
|
|
|
return tags
|
|
|
|
|
|
def get_tags_from_files_and_subfolders(startdir=os.getcwd(), use_cache=True):
|
|
"""
|
|
Traverses the file system starting with given directory,
|
|
returns dict of all tags (including starttags) of all file
|
|
|
|
@param use_cache: FOR FUTURE USE
|
|
@param return: dict of tags and their number of occurrence
|
|
"""
|
|
|
|
# add ", starttags=False" to parameters to enable this feature in future
|
|
starttags = False
|
|
|
|
assert(os.path.isdir(startdir))
|
|
|
|
if not starttags:
|
|
tags = {}
|
|
else:
|
|
assert(starttags.__class__ == dict)
|
|
tags = starttags
|
|
|
|
global cache_of_tags_by_folder
|
|
|
|
logging.debug('get_tags_from_files_and_subfolders called with startdir [%s], cached startdirs [%s]' % (startdir, str(len(list(cache_of_tags_by_folder.keys())))))
|
|
|
|
if use_cache and startdir in list(cache_of_tags_by_folder.keys()):
|
|
logging.debug("found " + str(len(cache_of_tags_by_folder[startdir])) + " tags in cache for directory: " + startdir)
|
|
return cache_of_tags_by_folder[startdir]
|
|
|
|
else:
|
|
|
|
for root, dirs, files in os.walk(startdir):
|
|
|
|
# logging.debug('get_tags_from_files_and_subfolders: root [%s]' % root) # LOTS of debug output
|
|
|
|
for filename in files:
|
|
for tag in extract_tags_from_filename(filename):
|
|
tags = add_tag_to_countdict(tag, tags)
|
|
|
|
for dirname in dirs:
|
|
for tag in extract_tags_from_filename(dirname):
|
|
tags = add_tag_to_countdict(tag, tags)
|
|
|
|
# Enable recursive directory traversal for specific options:
|
|
if not (options.recursive and (options.list_tags_by_alphabet or
|
|
options.list_tags_by_number or
|
|
options.list_unknown_tags or
|
|
options.tag_gardening)):
|
|
break # do not loop
|
|
|
|
logging.debug("Writing " + str(len(list(tags.keys()))) + " tags in cache for directory: " + startdir)
|
|
if use_cache:
|
|
cache_of_tags_by_folder[startdir] = tags
|
|
return tags
|
|
|
|
|
|
def find_similar_tags(tag, tags):
|
|
"""
|
|
Returns a list of entries of tags that are similar to tag (but not same as tag)
|
|
|
|
@param tag: a (unicode) string that represents a tag
|
|
@param tags: a list of (unicode) strings
|
|
@param return: list of tags that are similar to tag
|
|
"""
|
|
|
|
assert(tag.__class__ == str)
|
|
assert(tags.__class__ == list)
|
|
|
|
similar_tags = difflib.get_close_matches(tag, tags, n=999, cutoff=0.7)
|
|
close_but_not_exact_matches = []
|
|
|
|
# omit exact matches FIXXME: this can be done in one eloquent line -> refactor
|
|
for match in similar_tags:
|
|
if match != tag:
|
|
close_but_not_exact_matches.append(match)
|
|
|
|
return close_but_not_exact_matches
|
|
|
|
|
|
def print_tag_dict(tag_dict_reference, vocabulary=False, sort_index=0, print_similar_vocabulary_tags=False, print_only_tags_with_similar_tags=False):
|
|
"""
|
|
Takes a dictionary which holds tag names and their occurrence and prints it to stdout.
|
|
Tags that appear also in the vocabulary get marked in the output.
|
|
|
|
@param tag_dict: a dictionary holding tags and their occurrence number
|
|
@param vocabulary: array of tags from controlled vocabulary or False
|
|
"""
|
|
|
|
tag_dict = {}
|
|
tag_dict = tag_dict_reference
|
|
|
|
# determine maximum length of strings for formatting:
|
|
maxlength_tags = max(len(s) for s in list(tag_dict.keys())) + len(HINT_FOR_BEING_IN_VOCABULARY_TEMPLATE)
|
|
maxlength_count = len(str(abs(max(tag_dict.values()))))
|
|
if maxlength_count < 5:
|
|
maxlength_count = 5
|
|
|
|
hint_for_being_in_vocabulary = ''
|
|
similar_tags = ''
|
|
if vocabulary:
|
|
print("\n (Tags marked with \"" + HINT_FOR_BEING_IN_VOCABULARY_TEMPLATE + "\" appear in your vocabulary.)")
|
|
print("\n {0:{1}} : {2:{3}}".format('count', maxlength_count, 'tag', maxlength_tags))
|
|
print(" " + '-' * (maxlength_tags + maxlength_count + 7))
|
|
for tuple in sorted(list(tag_dict.items()), key=operator.itemgetter(sort_index)):
|
|
# sort dict of (tag, count) according to sort_index
|
|
|
|
if vocabulary and tuple[0] in vocabulary:
|
|
hint_for_being_in_vocabulary = HINT_FOR_BEING_IN_VOCABULARY_TEMPLATE
|
|
else:
|
|
hint_for_being_in_vocabulary = ''
|
|
|
|
similar_tags_list = []
|
|
if vocabulary and print_similar_vocabulary_tags:
|
|
tags_for_comparing = list(set(tag_dict.keys()).union(set(vocabulary))) # unified elements of both lists
|
|
similar_tags_list = find_similar_tags(tuple[0], tags_for_comparing)
|
|
if similar_tags_list:
|
|
similar_tags = ' (similar to: ' + ', '.join(similar_tags_list) + ')'
|
|
else:
|
|
similar_tags = ''
|
|
else:
|
|
similar_tags = ''
|
|
|
|
if (print_only_tags_with_similar_tags and similar_tags_list) or not print_only_tags_with_similar_tags:
|
|
print(" {0:{1}} : {2:{3}} {4}".format(tuple[1], maxlength_count, tuple[0] + hint_for_being_in_vocabulary, maxlength_tags, similar_tags))
|
|
|
|
print('')
|
|
|
|
|
|
def print_tag_set(tag_set, vocabulary=False, print_similar_vocabulary_tags=False):
|
|
"""
|
|
Takes a set which holds tag names and prints it to stdout.
|
|
Tags that appear also in the vocabulary get marked in the output.
|
|
|
|
@param tag_set: a set holding tags
|
|
@param vocabulary: array of tags from controlled vocabulary or False
|
|
@param print_similar_vocabulary_tags: if a vocabulary is given and tags are similar to it, print a list of them
|
|
"""
|
|
|
|
# determine maximum length of strings for formatting:
|
|
maxlength_tags = max(len(s) for s in tag_set) + len(HINT_FOR_BEING_IN_VOCABULARY_TEMPLATE)
|
|
|
|
hint_for_being_in_vocabulary = ''
|
|
if vocabulary:
|
|
print("\n (Tags marked with \"" + HINT_FOR_BEING_IN_VOCABULARY_TEMPLATE + "\" appear in your vocabulary.)\n")
|
|
|
|
for tag in sorted(tag_set):
|
|
|
|
if vocabulary and tag in vocabulary:
|
|
hint_for_being_in_vocabulary = HINT_FOR_BEING_IN_VOCABULARY_TEMPLATE
|
|
else:
|
|
hint_for_being_in_vocabulary = ''
|
|
|
|
if vocabulary and print_similar_vocabulary_tags:
|
|
tags_for_comparing = list(tag_set.union(set(vocabulary))) # unified elements of both lists
|
|
similar_tags_list = find_similar_tags(tag, tags_for_comparing)
|
|
if similar_tags_list:
|
|
similar_tags = ' (similar to: ' + ', '.join(similar_tags_list) + ')'
|
|
else:
|
|
similar_tags = ''
|
|
else:
|
|
similar_tags = ''
|
|
|
|
print(" {0:{1}} {2}".format(tag + hint_for_being_in_vocabulary, maxlength_tags, similar_tags))
|
|
|
|
print('')
|
|
|
|
|
|
def list_unknown_tags(file_tag_dict):
|
|
"""
|
|
Traverses the file system, extracts all tags, prints tags that are found in file names which are not found in the controlled vocabulary file .filetags
|
|
|
|
@param return: dict of tags (if max_tag_count is set, returned entries are set accordingly)
|
|
"""
|
|
|
|
vocabulary = locate_and_parse_controlled_vocabulary(False)
|
|
|
|
# filter out known tags from tag_dict
|
|
unknown_tag_dict = {key: value for key, value in list(file_tag_dict.items()) if key not in vocabulary}
|
|
|
|
if unknown_tag_dict:
|
|
print_tag_dict(unknown_tag_dict, vocabulary)
|
|
else:
|
|
print("\n " + str(len(file_tag_dict)) + " different tags were found in file names which are all" +
|
|
" part of your .filetags vocabulary (consisting of " + str(len(vocabulary)) + " tags).\n")
|
|
|
|
return unknown_tag_dict
|
|
|
|
|
|
def handle_tag_gardening(vocabulary):
|
|
"""
|
|
This method is quite handy to find tags that might contain typos or do not
|
|
differ much from other tags. You might want to rename them accordinly.
|
|
|
|
Tags are gathered from the file system.
|
|
|
|
Tags that appear also in the vocabulary get marked in the output.
|
|
|
|
@param vocabulary: array containing the controlled vocabulary (or False)
|
|
@param return: -
|
|
"""
|
|
|
|
tag_dict = get_tags_from_files_and_subfolders(startdir=os.getcwd())
|
|
if not tag_dict:
|
|
print("\nNo file containing tags found in this folder hierarchy.\n")
|
|
return
|
|
|
|
print("\nYou have used " + str(len(tag_dict)) + " tags in total.\n")
|
|
|
|
if vocabulary:
|
|
|
|
print('\nYour controlled vocabulary is defined in ' + controlled_vocabulary_filename + ' and contains ' + str(len(vocabulary)) + ' tags.\n')
|
|
|
|
vocabulary_tags_not_used = set(vocabulary) - set(tag_dict.keys())
|
|
if vocabulary_tags_not_used:
|
|
print("\nTags from your vocabulary which you didn't use:\n")
|
|
print_tag_set(vocabulary_tags_not_used)
|
|
|
|
tags_not_in_vocabulary = set(tag_dict.keys()) - set(vocabulary)
|
|
if tags_not_in_vocabulary:
|
|
print("\nTags you used that are not in the vocabulary:\n")
|
|
print_tag_set(tags_not_in_vocabulary)
|
|
|
|
print("\nTags that appear only once are most probably typos or you have forgotten them:")
|
|
tags_only_used_once_dict = {key: value for key, value in list(tag_dict.items()) if value < 2}
|
|
print_tag_dict(tags_only_used_once_dict, vocabulary, sort_index=0, print_only_tags_with_similar_tags=False)
|
|
|
|
print("\nTags which have similar other tags are probably typos or plural/singular forms of others:")
|
|
tags_for_comparing = list(set(tag_dict.keys()).union(set(vocabulary))) # unified elements of both lists
|
|
only_similar_tags_by_alphabet_dict = {key: value for key, value in list(tag_dict.items()) if find_similar_tags(key, tags_for_comparing)}
|
|
print_tag_dict(only_similar_tags_by_alphabet_dict, vocabulary, sort_index=0, print_similar_vocabulary_tags=True)
|
|
|
|
tags_only_used_once_set = set(tags_only_used_once_dict.keys())
|
|
only_similar_tags_by_alphabet_set = set(only_similar_tags_by_alphabet_dict.keys())
|
|
tags_in_both_outputs = tags_only_used_once_set.intersection(only_similar_tags_by_alphabet_set)
|
|
|
|
if tags_in_both_outputs != set([]):
|
|
print("\nIf tags appear in both lists from above (only once and similar to others), they most likely\nrequire your attention:")
|
|
print_tag_set(tags_in_both_outputs, vocabulary=vocabulary, print_similar_vocabulary_tags=True)
|
|
|
|
|
|
def locate_file_in_cwd_and_parent_directories(startfile, filename):
|
|
"""This method looks for the filename in the folder of startfile and its
|
|
parent folders. It returns the file name of the first file name found.
|
|
|
|
@param startfile: file whose path is the starting point; if False, the working path is taken
|
|
@param filename: string of file name to look for
|
|
@param return: file name found
|
|
"""
|
|
|
|
if startfile and os.path.isfile(startfile) and os.path.isfile(os.path.join(os.path.dirname(os.path.abspath(startfile)), filename)):
|
|
logging.debug('found \"%s\" in directory of \"%s\" ..' % (filename, startfile))
|
|
return filename
|
|
elif startfile and os.path.isdir(startfile) and os.path.isfile(os.path.join(startfile, filename)):
|
|
logging.debug('found \"%s\" in directory \"%s\" ...' % (filename, startfile))
|
|
return filename
|
|
else:
|
|
if os.path.isfile(startfile):
|
|
starting_dir = os.path.dirname(os.path.abspath(startfile))
|
|
logging.debug('startfile [%s] found, using it as starting_dir [%s] ....' % (str(startfile), starting_dir))
|
|
elif os.path.isdir(startfile):
|
|
starting_dir = startfile
|
|
logging.debug('startfile [%s] is a directory, using it as starting_dir [%s] .....' % (str(startfile), starting_dir))
|
|
else:
|
|
starting_dir = os.getcwd()
|
|
logging.debug('no startfile found; using cwd as starting_dir [%s] ......' % (starting_dir))
|
|
parent_dir = os.path.abspath(os.path.join(starting_dir, os.pardir))
|
|
logging.debug('looking for \"%s\" in directory \"%s\" .......' % (filename, parent_dir))
|
|
while parent_dir != os.getcwd():
|
|
os.chdir(parent_dir)
|
|
filename_to_look_for = os.path.abspath(os.path.join(os.getcwd(), filename))
|
|
if os.path.isfile(filename_to_look_for):
|
|
logging.debug('found \"%s\" in directory \"%s\" ........' % (filename, parent_dir))
|
|
os.chdir(starting_dir)
|
|
return filename_to_look_for
|
|
parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
|
|
os.chdir(starting_dir)
|
|
logging.debug('did NOT find \"%s\" in current directory or any parent directory' % filename)
|
|
return False
|
|
|
|
|
|
def locate_and_parse_controlled_vocabulary(startfile):
|
|
|
|
"""This method is looking for files named
|
|
CONTROLLED_VOCABULARY_FILENAME in the directory of startfile and parses
|
|
it. Each line contains a tag which gets read in for tab
|
|
completion.
|
|
|
|
@param startfile: file whose location is the starting point of the search
|
|
@param return: either False or a list of found tag strings
|
|
|
|
"""
|
|
|
|
if startfile:
|
|
filename = locate_file_in_cwd_and_parent_directories(startfile, CONTROLLED_VOCABULARY_FILENAME)
|
|
else:
|
|
filename = locate_file_in_cwd_and_parent_directories(os.getcwd(), CONTROLLED_VOCABULARY_FILENAME)
|
|
|
|
global unique_tags
|
|
|
|
if filename:
|
|
if os.path.isfile(filename):
|
|
logging.debug('locate_and_parse_controlled_vocabulary: found controlled vocabulary in folder of startfile')
|
|
tags = []
|
|
with codecs.open(filename, encoding='utf-8') as filehandle:
|
|
logging.debug('locate_and_parse_controlled_vocabulary: reading controlled vocabulary in [%s]' % filename)
|
|
global controlled_vocabulary_filename
|
|
controlled_vocabulary_filename = filename
|
|
for rawline in filehandle:
|
|
line = rawline.strip()
|
|
if BETWEEN_TAG_SEPARATOR in line:
|
|
## if multiple tags are in one line, they are mutually exclusive: only has can be set via filetags
|
|
logging.debug('locate_and_parse_controlled_vocabulary: found unique tags: %s' % (line))
|
|
unique_tags.append(line.split(BETWEEN_TAG_SEPARATOR))
|
|
for tag in line.split(BETWEEN_TAG_SEPARATOR):
|
|
# *also* append unique tags to general tag list:
|
|
tags.append(tag)
|
|
else:
|
|
tags.append(line)
|
|
logging.debug('locate_and_parse_controlled_vocabulary: controlled vocabulary has %i tags' % len(tags))
|
|
logging.debug('locate_and_parse_controlled_vocabulary: controlled vocabulary has %i groups of unique tags' % (len(unique_tags) - 1))
|
|
return tags
|
|
else:
|
|
logging.debug('locate_and_parse_controlled_vocabulary: could not find controlled vocabulary in folder of startfile')
|
|
return []
|
|
else:
|
|
logging.debug('locate_and_parse_controlled_vocabulary: could not derive filename for controlled vocabulary in folder of startfile')
|
|
return []
|
|
|
|
|
|
def print_tag_shortcut_with_numbers(tag_list, tags_get_added=True, tags_get_linked=False):
|
|
"""A list of tags from the list are printed to stdout. Each tag
|
|
gets a number associated which corresponds to the position in the
|
|
list (although starting with 1).
|
|
|
|
@param tag_list: list of string holding the tags
|
|
@param tags_get_added: True if tags get added, False otherwise
|
|
@param return: -
|
|
"""
|
|
|
|
if tags_get_added:
|
|
if len(tag_list) < 9:
|
|
hint_string = "Previously used tags in this directory:"
|
|
else:
|
|
hint_string = "Top nine previously used tags in this directory:"
|
|
elif tags_get_linked:
|
|
if len(tag_list) < 9:
|
|
hint_string = "Used tags in this directory:"
|
|
else:
|
|
hint_string = "Top nine used tags in this directory:"
|
|
else:
|
|
if len(tag_list) < 9:
|
|
hint_string = "Possible tags to be removed:"
|
|
else:
|
|
hint_string = "Top nine possible tags to be removed:"
|
|
print("\n " + hint_string)
|
|
|
|
count = 1
|
|
list_of_tag_hints = []
|
|
for tag in tag_list:
|
|
list_of_tag_hints.append(tag + ' (' + str(count) + ')')
|
|
count += 1
|
|
print(' ' + ' ⋅ '.join(list_of_tag_hints))
|
|
print('') # newline at end
|
|
|
|
|
|
def check_for_possible_shortcuts_in_entered_tags(usertags, list_of_shortcut_tags):
|
|
"""
|
|
Returns tags if the only tag is not a shortcut (entered as integer).
|
|
Returns a list of corresponding tags if it's an integer.
|
|
|
|
@param usertags: list of entered tags from the user, e.g., [u'23']
|
|
@param list_of_shortcut_tags: list of possible shortcut tags, e.g., [u'bar', u'folder1', u'baz']
|
|
@param return: list of tags which were meant by the user, e.g., [u'bar', u'baz']
|
|
"""
|
|
|
|
assert(usertags.__class__ == list)
|
|
assert(list_of_shortcut_tags.__class__ == list)
|
|
|
|
foundtags = [] # collect all found tags which are about to return from this function
|
|
|
|
for currenttag in usertags:
|
|
try:
|
|
logging.debug('tag is an integer; stepping through the integers')
|
|
found_shortcut_tags_within_currenttag = [] # collects the shortcut tags of a (single) currenttag
|
|
for character in list(currenttag):
|
|
# step through the characters and find out if it consists of valid indexes of the list_of_shortcut_tags:
|
|
if currenttag in foundtags:
|
|
# we already started to step through currenttag, character by character, and found out (via
|
|
# IndexError) that the whole currenttag is a valid tag and added it already to the tags-list.
|
|
# Continue with the next tag from the user instead of continue to step through the characters:
|
|
continue
|
|
try:
|
|
# try to append the index element to the list of found shortcut tags so far (and risk an IndexError):
|
|
found_shortcut_tags_within_currenttag.append(list_of_shortcut_tags[int(character) - 1])
|
|
except IndexError:
|
|
# IndexError tells us that the currenttag contains a character which is not a valid index of
|
|
# list_of_shortcut_tags. Therefore, the whole currenttag is a valid tag and not a set of
|
|
# indexes for shortcuts:
|
|
foundtags.append(currenttag)
|
|
continue
|
|
if currenttag not in foundtags:
|
|
# Stepping through all characters without IndexErrors
|
|
# showed us that all characters were valid indexes for
|
|
# shortcuts and therefore extending those shortcut tags to
|
|
# the list of found tags:
|
|
logging.debug('adding shortcut tags of number(s) %s' % currenttag)
|
|
foundtags.extend(found_shortcut_tags_within_currenttag)
|
|
except ValueError:
|
|
# ValueError tells us that one character is not an integer. Therefore, the whole currenttag is a valid tag:
|
|
logging.debug('whole tag is a normal tag')
|
|
foundtags.append(currenttag)
|
|
|
|
return foundtags
|
|
|
|
|
|
def get_upto_nine_keys_of_dict_with_highest_value(mydict, list_of_tags_to_omit=[]):
|
|
"""
|
|
Takes a dict, sorts it according to their values, and returns up to nine
|
|
values with the highest values.
|
|
|
|
Example1: { "key2":45, "key1": 33} -> [ "key1", "key2" ]
|
|
Example2: { "key2":45, "key1": 33, "key3": 99} list_of_tags_to_omit=["key3"] -> [ "key1", "key2" ]
|
|
|
|
@param mydict: dictionary holding keys and values
|
|
@param list_of_tags_to_omit: list of strings that should not be part of the returned list
|
|
@param return: list of up to top nine keys according to the rank of their values
|
|
"""
|
|
|
|
assert(mydict.__class__ == dict)
|
|
|
|
complete_list = sorted(mydict, key=mydict.get, reverse=True)
|
|
|
|
logging.debug("get_upto_nine_keys_of_dict_with_highest_value: complete_list: " + ", ".join(complete_list))
|
|
if list_of_tags_to_omit:
|
|
logging.debug("get_upto_nine_keys_of_dict_with_highest_value: omitting tags: " + ", ".join(list_of_tags_to_omit))
|
|
complete_list = [x for x in complete_list if x not in list_of_tags_to_omit]
|
|
|
|
return sorted(complete_list[:9])
|
|
|
|
|
|
def _get_tag_visual(tags_for_visual=None):
|
|
"""
|
|
Returns a visual representation of a tag. If the optional tags_for_visual
|
|
is given, write the list of those tags into to the tag as well.
|
|
|
|
@param tags_for_visual: list of strings with tags
|
|
@param return: string with a multi-line representation of a visual tag
|
|
"""
|
|
|
|
if not tags_for_visual:
|
|
tags = " ? "
|
|
else:
|
|
tags = BETWEEN_TAG_SEPARATOR.join(sorted(tags_for_visual))
|
|
|
|
length = len(tags)
|
|
visual = " .---" + '-' * length + "--, \n" + \
|
|
" | o " + tags + " | \n" + \
|
|
" `---" + '-' * length + "--' "
|
|
|
|
return visual
|
|
|
|
|
|
def ask_for_tags(vocabulary, upto9_tags_for_shortcuts, tags_for_visual=None):
|
|
"""
|
|
Takes a vocabulary and optional up to nine tags for shortcuts and interactively asks
|
|
the user to enter tags. Aborts program if no tags were entered. Returns list of
|
|
entered tags.
|
|
|
|
@param vocabulary: array containing the controlled vocabulary
|
|
@param upto9_tags_for_shortcuts: array of tags which can be used to generate number-shortcuts
|
|
@param return: list of up to top nine keys according to the rank of their values
|
|
"""
|
|
|
|
completionhint = ''
|
|
if vocabulary and len(vocabulary) > 0:
|
|
|
|
assert(vocabulary.__class__ == list)
|
|
|
|
# Register our completer function
|
|
readline.set_completer(SimpleCompleter(vocabulary).complete)
|
|
|
|
# Use the tab key for completion
|
|
readline.parse_and_bind('tab: complete')
|
|
|
|
completionhint = '; complete %s tags with TAB' % str(len(vocabulary))
|
|
|
|
logging.debug("len(files) [%s]" % str(len(options.files)))
|
|
logging.debug("files: %s" % str(options.files))
|
|
|
|
print(" ")
|
|
print("Please enter tags, separated by \"" + BETWEEN_TAG_SEPARATOR + "\"; abort with Ctrl-C" +
|
|
completionhint)
|
|
print(" ")
|
|
print(_get_tag_visual(tags_for_visual))
|
|
print(" ")
|
|
|
|
if len(upto9_tags_for_shortcuts) > 0:
|
|
print_tag_shortcut_with_numbers(upto9_tags_for_shortcuts,
|
|
tags_get_added=(not options.remove and not options.tagfilter),
|
|
tags_get_linked=options.tagfilter)
|
|
|
|
logging.debug("interactive mode: asking for tags ...")
|
|
entered_tags = input('Tags: ').strip()
|
|
tags_from_userinput = extract_tags_from_argument(entered_tags)
|
|
|
|
if not tags_from_userinput:
|
|
logging.info("no tags given, exiting.")
|
|
sys.stdout.flush()
|
|
sys.exit(0)
|
|
else:
|
|
if len(upto9_tags_for_shortcuts) > 0:
|
|
# check if user entered number shortcuts for tags to be removed:
|
|
tags_from_userinput = check_for_possible_shortcuts_in_entered_tags(tags_from_userinput, upto9_tags_for_shortcuts)
|
|
return tags_from_userinput
|
|
|
|
|
|
def get_files_of_directory(directory):
|
|
"""
|
|
Lists the files of the given directory and returns a list of its files.
|
|
|
|
@param directory: string of an existing directory
|
|
@param return: list of file names of given directory
|
|
"""
|
|
|
|
files = []
|
|
for (dirpath, dirnames, filenames) in os.walk(directory):
|
|
if options.recursive:
|
|
files.extend([os.path.join(dirpath, x) for x in filenames])
|
|
else:
|
|
files.extend(filenames)
|
|
break
|
|
|
|
return files
|
|
|
|
|
|
def filter_files_matching_tags(allfiles, tags):
|
|
"""
|
|
Returns a list of file names that contain all given tags.
|
|
|
|
@param allfiles: array of file names
|
|
@param tags: array of tags
|
|
@param return: list of file names that contain all tags
|
|
"""
|
|
|
|
return [x for x in allfiles if set(extract_tags_from_filename(x)).issuperset(set(tags))]
|
|
|
|
|
|
def assert_empty_tagfilter_directory(directory):
|
|
"""
|
|
Creates non-existent tagfilter directory or deletes and re-creates it.
|
|
|
|
@param directory: the directory to use as starting directory
|
|
"""
|
|
|
|
if not os.path.isdir(directory):
|
|
logging.debug('creating non-existent tagfilter directory "%s" ...' % str(directory))
|
|
if not options.dryrun:
|
|
os.makedirs(directory)
|
|
else:
|
|
logging.debug('found old tagfilter directory "%s"; deleting directory ...' % str(directory))
|
|
if not options.dryrun:
|
|
save_import('shutil') # for removing directories with shutil.rmtree()
|
|
shutil.rmtree(directory)
|
|
logging.debug('re-creating tagfilter directory "%s" ...' % str(directory))
|
|
os.makedirs(directory)
|
|
if not options.dryrun:
|
|
assert(os.path.isdir(directory))
|
|
|
|
|
|
def get_common_tags_from_files(files):
|
|
"""
|
|
Returns a list of tags that are common (intersection) for all files.
|
|
|
|
@param files: array of file names
|
|
@param return: list of tags
|
|
"""
|
|
|
|
list_of_tags_per_file = []
|
|
for currentfile in files:
|
|
list_of_tags_per_file.append(set(extract_tags_from_filename(currentfile)))
|
|
|
|
return list(set.intersection(*list_of_tags_per_file))
|
|
|
|
|
|
def generate_tagtrees(directory, maxdepth, ignore_nontagged, nontagged_subdir):
|
|
"""
|
|
This functions is somewhat sophisticated with regards to the background.
|
|
If you're really interested in the whole story behind the
|
|
visualization/navigation of tags using tagtrees, feel free to read [my
|
|
PhD thesis] about it on [the tagstore webpage]. It is surely a piece of
|
|
work I am proud of and the general chapters of it are written so that
|
|
the average person is perfectly well able to follow.
|
|
|
|
In short: this function takes the files of the current directory and
|
|
generates hierarchies up to level of `$maxdepth' (by default 2) of all
|
|
combinations of tags, [linking] all files according to their tags.
|
|
|
|
Consider having a file like:
|
|
|
|
┌────
|
|
│ My new car -- car hardware expensive.jpg
|
|
└────
|
|
|
|
Now you generate the tagtrees, you'll find [links] to this file within
|
|
`~/.filetags', the default target directory: `new/' and `hardware/' and
|
|
`expensive/' and `new/hardware/' and `new/expensive/' and
|
|
`hardware/new/' and so on. You get the idea.
|
|
|
|
Therefore, within the folder `new/expensive/' you will find all files
|
|
that have at least the tags "new" and "expensive" in any order. This is
|
|
/really/ cool to have.
|
|
|
|
Files of the current directory that don't have any tag at all, are
|
|
linked directly to `~/.filetags' so that you can find and tag them
|
|
easily.
|
|
|
|
I personally, do use this feature within my image viewer of choice
|
|
([geeqie]). I mapped it to `Shift-T' because `Shift-t' is occupied by
|
|
`filetags' for tagging of course. So when I am within my image viewer
|
|
and I press `Shift-T', tagtrees of the currently shown images are
|
|
created. Then an additional image viewer window opens up for me, showing
|
|
the resulting tagtrees. This way, I can quickly navigate through the tag
|
|
combinations to easily interactively filter according to tags.
|
|
|
|
Please note: when you are tagging linked files within the tagtrees with
|
|
filetags, only the current link gets updated with the new name. All
|
|
other links to this modified filename within the other directories of
|
|
the tagtrees gets broken. You have to re-create the tagtrees to update
|
|
all the links after tagging files.
|
|
|
|
|
|
[my PhD thesis] http://Karl-Voit.at/tagstore/downloads/Voit2012b.pdf
|
|
|
|
[the tagstore webpage] http://Karl-Voit.at/tagstore/
|
|
|
|
[linking] https://en.wikipedia.org/wiki/Symbolic_link
|
|
|
|
[links] https://en.wikipedia.org/wiki/Symbolic_link
|
|
|
|
[geeqie] http://geeqie.sourceforge.net/
|
|
|
|
Valid combinations for ignore_nontagged and nontagged_subdir are:
|
|
|
|
| ignore_nontagged | nontagged_subdir | results in ... |
|
|
|------------------+------------------+-------------------------------------------------------------------|
|
|
| False | False | non-linked items are linked to tagtrees root |
|
|
| False | <a string> | non-linked items are linked to a tagtrees folder named <a string> |
|
|
| True | False | non-linked items are ignored |
|
|
|
|
@param directory: the directory to use as starting directory
|
|
@param maxdepth: integer which holds the depth to which the tagtrees are generated; keep short to avoid HUGE execution times!
|
|
@param ignore_nontagged: (bool) if True, non-tagged items are ignored and not linked
|
|
@param nontagged_subdir: (string) holds a string containing the sub-directory name to link non-tagged items to
|
|
"""
|
|
import pdb; pdb.set_trace()
|
|
assert_empty_tagfilter_directory(directory)
|
|
|
|
# The boolean ignore_nontagged must be "False" when nontagged_subdir holds a value:
|
|
# valid combinations:
|
|
assert((ignore_nontagged and not nontagged_subdir) or
|
|
(not ignore_nontagged and (not nontagged_subdir or type(nontagged_subdir)==str)))
|
|
|
|
# Extract the variables nontagged_item_dest_dir from the valid combinations
|
|
# of nontagged_subdir and ignore_nontagged:
|
|
nontagged_item_dest_dir = False # ignore non-tagged items
|
|
if nontagged_subdir:
|
|
nontagged_item_dest_dir = os.path.join(directory, nontagged_subdir)
|
|
assert_empty_tagfilter_directory(nontagged_item_dest_dir)
|
|
elif not ignore_nontagged:
|
|
nontagged_item_dest_dir = directory
|
|
|
|
try:
|
|
files = get_files_of_directory(os.getcwd())
|
|
except FileNotFoundError:
|
|
error_exit(11, 'When trying to look for files, I could not even find the current working directory. ' + \
|
|
'Could it be the case that you\'ve tried to generate tagtrees within the directory "' + directory + '"? ' + \
|
|
'This would be a pity because filetags tends to delete and re-create this directory on each call of this feature. ' + \
|
|
'Therefore, this directory does not exist after starting filetags and cleaning up the old content of it. ' + \
|
|
'So it looks like we\'ve got a shot-yourself-in-the-foot situation here … You can imagine that this was not ' + \
|
|
'even simple to find and catch while testing for me either. Or was it? Make an educated guess. :-)')
|
|
|
|
if len(files) == 0 and not options.recursive:
|
|
error_exit(10, 'There is no single file in the current directory "' + os.getcwd() + '". I can\'t create ' + \
|
|
'tagtrees from nothing. You gotta give me at least something to work with here, dude.')
|
|
|
|
# If a controlled vocabulary file is found for the directory where the tagtree
|
|
# should be generated for, we link this file to the resulting tagtrees root
|
|
# directory as well. This way, adding tags using tag completion also works for
|
|
# the linked items.
|
|
controlled_vocabulary_filename = locate_file_in_cwd_and_parent_directories(os.getcwd(), CONTROLLED_VOCABULARY_FILENAME)
|
|
if controlled_vocabulary_filename:
|
|
logging.debug('I found controlled_vocabulary_filename "' + controlled_vocabulary_filename + '" which I\'m going to link to the tagtrees folder')
|
|
if not options.dryrun:
|
|
os.symlink(os.path.abspath(controlled_vocabulary_filename), os.path.join(directory, CONTROLLED_VOCABULARY_FILENAME))
|
|
else:
|
|
logging.debug('I did not find a controlled_vocabulary_filename')
|
|
|
|
logging.info('Creating tagtrees and their symlinks. It may take a while … (exponentially with respect to number of tags)')
|
|
|
|
tags = get_tags_from_files_and_subfolders(startdir=os.getcwd(), use_cache=True)
|
|
|
|
# Here, we define a small helper function within a function. Cool,
|
|
# heh? Bet many folks are not aware of those nifty things I know of ;-P
|
|
def create_tagtrees_dir(basedirectory, tagpermutation):
|
|
"Creates (empty) directories of the tagtrees directory structure"
|
|
|
|
current_directory = os.path.join(basedirectory, *[x for x in tagpermutation]) # flatten out list of permutations to elements
|
|
# logging.debug('generate_tagtrees: mkdir ' + current_directory)
|
|
if not options.dryrun and not os.path.exists(current_directory):
|
|
os.makedirs(current_directory)
|
|
|
|
# this generates a list whose elements (the tags) corresponds to
|
|
# the filenames in the files list:
|
|
tags_of_files = [extract_tags_from_filename(x) for x in files]
|
|
|
|
# Firstly, let's iterate over the files, create tagtree
|
|
# directories according to the set of tags from the current file
|
|
# to avoid empty tagtree directories. Then we're going to link the
|
|
# file to its tagtree directories. I'm confident that this is
|
|
# going to be great.
|
|
|
|
num_of_links = 0
|
|
for currentfile in enumerate(files):
|
|
|
|
tags_of_currentfile = tags_of_files[currentfile[0]]
|
|
filename, dirname, basename = split_up_filename(currentfile[1])
|
|
|
|
|
|
logging.debug('generate_tagtrees: handling file "' + filename + '" …')
|
|
|
|
if len(tags_of_currentfile) == 0:
|
|
# current file has no tags. It gets linked to the
|
|
# nontagged_item_dest_dir folder (if set). This is somewhat handy to find files
|
|
# which are - you guessed right - not tagged yet ;-)
|
|
|
|
if ignore_nontagged:
|
|
logging.debug('generate_tagtrees: file "' + filename + '" has no tags and will be ignores because of command line switch.')
|
|
else:
|
|
logging.debug('generate_tagtrees: file "' + filename + '" has no tags. Linking to "' +
|
|
nontagged_item_dest_dir + '"')
|
|
if not options.dryrun:
|
|
try:
|
|
os.symlink(filename, os.path.join(nontagged_item_dest_dir, basename))
|
|
except FileExistsError:
|
|
logging.warning('Untagged file \"' + filename + '\" is already linked: \"' +
|
|
os.path.join(nontagged_item_dest_dir, basename) + '\". You must have used the recursive ' +
|
|
'option and the sub-tree you\'re generating a tagtree from has two times the ' +
|
|
'same filename. I stick with the first one.')
|
|
num_of_links += 1
|
|
|
|
else:
|
|
|
|
# Here we go: current file has at least one tag. Create
|
|
# its tagtree directories and link the file:
|
|
|
|
# logging.debug('generate_tagtrees: permutations for file: "' + filename + '"')
|
|
for currentdepth in range(1, maxdepth+1):
|
|
# logging.debug('generate_tagtrees: currentdepth: ' + str(currentdepth))
|
|
for tagpermutation in itertools.permutations(tags_of_currentfile, currentdepth):
|
|
|
|
# WHAT I THOUGHT:
|
|
# Creating the directories does not require to iterate
|
|
# over the different level of depht because
|
|
# "os.makedirs()" is able to create all parent folders
|
|
# that are necessary. This spares us a loop.
|
|
# WHAT I LEARNED:
|
|
# We *have* to iterate over the depht as well
|
|
# because when a file has only one tag and the
|
|
# maxdepth is more than one, we are forgetting
|
|
# to create all those tagtree directories for this
|
|
# single tag. Therefore: we need to depth-loop for
|
|
# creating the directories as well. Bummer.
|
|
create_tagtrees_dir(directory, tagpermutation)
|
|
|
|
current_directory = os.path.join(directory, *[x for x in tagpermutation]) ## flatten out list of permutations to elements
|
|
# logging.debug('generate_tagtrees: linking file in ' + current_directory)
|
|
if not options.dryrun:
|
|
try:
|
|
os.symlink(filename, os.path.join(current_directory, basename))
|
|
except FileExistsError:
|
|
logging.warning('Tagged file \"' + filename + '\" is already linked: \"' +
|
|
os.path.join(current_directory, basename) + '\". You must have used the recursive ' +
|
|
'option and the sub-tree you\'re generating a tagtree from has two times the same ' +
|
|
'filename. I stick with the first one.')
|
|
num_of_links += 1
|
|
|
|
# Brag about how brave I was. And: it also shows the user why the
|
|
# runtime was that long. The number of links grows exponentially
|
|
# with the number of tags. Keep this in mind when tempering with
|
|
# the maxdepth!
|
|
logging.info('Number of symbolic links created in "' + directory + '" for the ' + str(len(files)) + ' files: ' +
|
|
str(num_of_links) + ' (tagtrees depth is ' + str(maxdepth) + ')')
|
|
|
|
|
|
def start_filebrowser(directory):
|
|
"""
|
|
This functions starts up the default file browser or the one given in the overriding command line parameter.
|
|
|
|
@param directory: the directory to use as starting directory
|
|
"""
|
|
|
|
save_import('subprocess')
|
|
save_import('platform')
|
|
current_platform = platform.system()
|
|
logging.debug('platform.system() is: [' + current_platform + ']')
|
|
if current_platform == 'Linux':
|
|
chosen_filebrowser = DEFAULT_IMAGE_VIEWER_LINUX
|
|
if options.filebrowser:
|
|
chosen_filebrowser = options.filebrowser # override if given
|
|
|
|
if options.dryrun:
|
|
logging.info('DRYRUN: I would now open the file browser "' + chosen_filebrowser + '"')
|
|
else:
|
|
subprocess.call([chosen_filebrowser, directory])
|
|
|
|
else:
|
|
logging.info('No (default) file browser defined for platform \"' + current_platform + '\".')
|
|
logging.info('Please visit ' + directory + ' to view filtered items.')
|
|
|
|
|
|
def successful_exit():
|
|
logging.debug("successfully finished.")
|
|
sys.stdout.flush()
|
|
sys.exit(0)
|
|
|
|
|
|
def main():
|
|
"""Main function"""
|
|
|
|
if options.version:
|
|
print(os.path.basename(sys.argv[0]) + " version " + PROG_VERSION_DATE)
|
|
sys.exit(0)
|
|
|
|
handle_logging()
|
|
|
|
if options.verbose and options.quiet:
|
|
error_exit(1, "Options \"--verbose\" and \"--quiet\" found. " +
|
|
"This does not make any sense, you silly fool :-)")
|
|
|
|
# interactive mode and tags are given
|
|
if options.interactive and options.tags:
|
|
error_exit(3, "I found option \"--tag\" and option \"--interactive\". \n" +
|
|
"Please choose either tag option OR interactive mode.")
|
|
|
|
if options.list_tags_by_number and options.list_tags_by_alphabet:
|
|
error_exit(6, "Please use only one list-by-option at once.")
|
|
|
|
if options.tag_gardening and (options.list_tags_by_number or options.list_tags_by_alphabet or
|
|
options.tags or options.tagtrees or options.tagfilter):
|
|
error_exit(7, "Please don't use that gardening option together with any other option.")
|
|
|
|
if options.tagfilter and (options.list_tags_by_number or options.list_tags_by_alphabet or
|
|
options.tags or options.tagtrees or options.tag_gardening):
|
|
error_exit(7, "Please don't use that filter option together with any other option.")
|
|
|
|
if options.list_tags_by_number and (options.tagfilter or options.list_tags_by_alphabet or
|
|
options.tags or options.tagtrees or options.tag_gardening):
|
|
error_exit(7, "Please don't use that list option together with any other option.")
|
|
|
|
if options.list_tags_by_alphabet and (options.tagfilter or options.list_tags_by_number or
|
|
options.tags or options.tagtrees or options.tag_gardening):
|
|
error_exit(7, "Please don't use that list option together with any other option.")
|
|
|
|
if options.tags and (options.tagfilter or options.list_tags_by_number or
|
|
options.list_tags_by_alphabet or options.tagtrees or options.tag_gardening):
|
|
error_exit(7, "Please don't use that tags option together with any other option.")
|
|
|
|
if options.tagtrees and (options.tagfilter or options.list_tags_by_number or
|
|
options.list_tags_by_alphabet or options.tags or options.tag_gardening):
|
|
error_exit(7, "Please don't use the tagtrees option together with any other option.")
|
|
|
|
if (options.list_tags_by_alphabet or options.list_tags_by_number) and (options.tags or options.interactive or options.remove):
|
|
error_exit(8, "Please don't use list any option together with add/remove tag options.")
|
|
|
|
logging.debug("extracting list of files ...")
|
|
logging.debug("len(options.files) [%s]" % str(len(options.files)))
|
|
|
|
files = extract_filenames_from_argument(options.files)
|
|
|
|
global list_of_symlink_directories
|
|
|
|
logging.debug("%s filenames found: [%s]" % (str(len(files)), '], ['.join(files)))
|
|
logging.debug('reported console width: ' + str(TTY_WIDTH) + ' and height: ' + str(TTY_HEIGHT) + ' (80/80 is the fall-back)')
|
|
tags_from_userinput = []
|
|
vocabulary = sorted(locate_and_parse_controlled_vocabulary(False))
|
|
|
|
if len(options.files) < 1 and not (options.tagtrees or options.tagfilter or options.list_tags_by_alphabet or
|
|
options.list_tags_by_number or options.list_unknown_tags or options.tag_gardening):
|
|
error_exit(5, "Please add at least one file name as argument")
|
|
|
|
if options.list_tags_by_alphabet or options.list_tags_by_number or options.list_unknown_tags:
|
|
|
|
tag_dict = get_tags_from_files_and_subfolders(startdir=os.getcwd())
|
|
if not tag_dict:
|
|
print("\nNo file containing tags found in this folder hierarchy.\n")
|
|
return {}
|
|
|
|
if options.list_tags_by_alphabet:
|
|
logging.debug("handling option list_tags_by_alphabet")
|
|
print_tag_dict(tag_dict, vocabulary=vocabulary, sort_index=0, print_similar_vocabulary_tags=True)
|
|
successful_exit()
|
|
|
|
elif options.list_tags_by_number:
|
|
logging.debug("handling option list_tags_by_number")
|
|
print_tag_dict(tag_dict, vocabulary=vocabulary, sort_index=1, print_similar_vocabulary_tags=True)
|
|
successful_exit()
|
|
|
|
elif options.list_unknown_tags:
|
|
logging.debug("handling option list_unknown_tags")
|
|
list_unknown_tags(tag_dict)
|
|
successful_exit()
|
|
|
|
elif options.tag_gardening:
|
|
logging.debug("handling option for tag gardening")
|
|
handle_tag_gardening(vocabulary)
|
|
successful_exit()
|
|
|
|
elif options.tagtrees:
|
|
logging.debug("handling option for tagtrees")
|
|
ignore_nontagged = False
|
|
nontagged_subdir = False
|
|
if options.tagtrees_handle_no_tag:
|
|
if options.tagtrees_handle_no_tag[0] == 'treeroot':
|
|
logging.debug("options.tagtrees_handle_no_tag found: treeroot (default)")
|
|
pass # keep defaults
|
|
elif options.tagtrees_handle_no_tag[0] == 'ignore':
|
|
logging.debug("options.tagtrees_handle_no_tag found: ignore")
|
|
ignore_nontagged = True
|
|
else:
|
|
ignore_nontagged = False
|
|
nontagged_subdir = options.tagtrees_handle_no_tag[0]
|
|
logging.debug("options.tagtrees_handle_no_tag found: use foldername [" + repr(options.tagtrees_handle_no_tag) + "]")
|
|
|
|
start = time.time()
|
|
generate_tagtrees(TAGFILTER_DIRECTORY, DEFAULT_TAGTREES_MAXDEPTH, ignore_nontagged, nontagged_subdir)
|
|
delta = time.time() - start # it's a float
|
|
if delta > 3:
|
|
logging.info("Generated tagtrees in %.2f seconds" % delta)
|
|
start_filebrowser(TAGFILTER_DIRECTORY)
|
|
successful_exit()
|
|
|
|
elif options.interactive or not options.tags:
|
|
|
|
tags_for_visual = None
|
|
|
|
if len(options.files) < 1 and not options.tagfilter:
|
|
error_exit(5, "Please add at least one file name as argument")
|
|
|
|
tags_for_vocabulary = {}
|
|
upto9_tags_for_shortcuts = []
|
|
|
|
# look out for .filetags file and add readline support for tag completion if found with content
|
|
if options.remove:
|
|
# vocabulary for completing tags is current tags of files
|
|
for currentfile in files:
|
|
# add tags so that list contains all unique tags:
|
|
for newtag in extract_tags_from_filename(currentfile):
|
|
add_tag_to_countdict(newtag, tags_for_vocabulary)
|
|
vocabulary = sorted(tags_for_vocabulary.keys())
|
|
upto9_tags_for_shortcuts = sorted(get_upto_nine_keys_of_dict_with_highest_value(tags_for_vocabulary))
|
|
|
|
elif options.tagfilter:
|
|
for tag in get_tags_from_files_and_subfolders(startdir=os.getcwd()):
|
|
add_tag_to_countdict(tag, tags_for_vocabulary)
|
|
|
|
logging.debug('generating vocabulary ...')
|
|
vocabulary = sorted(tags_for_vocabulary.keys())
|
|
upto9_tags_for_shortcuts = sorted(get_upto_nine_keys_of_dict_with_highest_value(tags_for_vocabulary))
|
|
|
|
else:
|
|
if files:
|
|
|
|
# remove given (shared) tags from the vocabulary:
|
|
tags_intersection_of_files = get_common_tags_from_files(files)
|
|
tags_for_visual = tags_intersection_of_files
|
|
logging.debug("found common tags: tags_intersection_of_files[%s]" % '], ['.join(tags_intersection_of_files))
|
|
|
|
# append current filetags with a prepended '-' in order to allow tag completion for removing tags via '-tagname'
|
|
tags_from_filenames = set()
|
|
for currentfile in files:
|
|
tags_from_filenames = tags_from_filenames.union(set(extract_tags_from_filename(currentfile)))
|
|
negative_tags_from_filenames = set()
|
|
for currenttag in list(tags_from_filenames):
|
|
negative_tags_from_filenames.add('-' + currenttag)
|
|
|
|
vocabulary = list(set(vocabulary).union(negative_tags_from_filenames) - set(tags_intersection_of_files))
|
|
|
|
logging.debug('deriving upto9_tags_for_shortcuts ...')
|
|
upto9_tags_for_shortcuts = sorted(
|
|
get_upto_nine_keys_of_dict_with_highest_value(
|
|
get_tags_from_files_and_subfolders(
|
|
startdir=os.path.dirname(
|
|
os.path.abspath(files[0]))),
|
|
tags_intersection_of_files))
|
|
logging.debug('derived upto9_tags_for_shortcuts')
|
|
logging.debug('derived vocabulary with %i entries' % len(vocabulary)) # using default vocabulary which was generate above
|
|
|
|
# ==================== Interactive asking user for tags ============================= ##
|
|
tags_from_userinput = ask_for_tags(vocabulary, upto9_tags_for_shortcuts, tags_for_visual)
|
|
# ==================== Interactive asking user for tags ============================= ##
|
|
print('') # new line after input for separating input from output
|
|
|
|
else:
|
|
# non-interactive: extract list of tags
|
|
logging.debug("non-interactive mode: extracting tags from argument ...")
|
|
|
|
tags_from_userinput = extract_tags_from_argument(options.tags[0])
|
|
|
|
if not tags_from_userinput:
|
|
# FIXXME: check: can this be the case?
|
|
logging.info("no tags given, exiting.")
|
|
sys.stdout.flush()
|
|
sys.exit(0)
|
|
|
|
logging.debug("tags found: [%s]" % '], ['.join(tags_from_userinput))
|
|
if options.remove:
|
|
logging.info("removing tags \"%s\" ..." % str(BETWEEN_TAG_SEPARATOR.join(tags_from_userinput)))
|
|
elif options.tagfilter:
|
|
logging.info("filtering items with tag(s) \"%s\" and linking to directory \"%s\" ..." %
|
|
(str(BETWEEN_TAG_SEPARATOR.join(tags_from_userinput)), str(TAGFILTER_DIRECTORY)))
|
|
elif options.interactive:
|
|
logging.info("processing tags \"%s\" ..." % str(BETWEEN_TAG_SEPARATOR.join(tags_from_userinput)))
|
|
|
|
if options.tagfilter and not files:
|
|
assert_empty_tagfilter_directory(TAGFILTER_DIRECTORY)
|
|
files = filter_files_matching_tags(get_files_of_directory(os.getcwd()), tags_from_userinput)
|
|
|
|
logging.debug("iterate over files ...")
|
|
|
|
global max_file_length
|
|
for filename in files:
|
|
if len(filename) > max_file_length:
|
|
max_file_length = len(filename)
|
|
logging.debug('determined maximum file name length with %i' % max_file_length)
|
|
|
|
for filename in files:
|
|
|
|
if is_broken_link(filename):
|
|
|
|
# skip broken links completely and write error message:
|
|
logging.error('File "' + filename + '" is a broken symbolic link. Skipping this one …')
|
|
|
|
else:
|
|
|
|
# if filename is a symbolic link, tag the source file as well:
|
|
handle_file_and_symlink_source_if_found(filename, tags_from_userinput, options.remove, options.tagfilter, options.dryrun)
|
|
logging.debug('list_of_symlink_directories: ' + repr(list_of_symlink_directories))
|
|
|
|
if len(list_of_symlink_directories) > 1:
|
|
logging.debug('Seems like we\'ve found symlinks and renamed their source as well. Print out the those directories as well:')
|
|
print(' This symbolic link has a link source with a matching basename. I renamed it there as well:')
|
|
for directory in list_of_symlink_directories[:-1]:
|
|
print(' · ' + directory)
|
|
list_of_symlink_directories = []
|
|
|
|
if options.tagfilter:
|
|
start_filebrowser(TAGFILTER_DIRECTORY)
|
|
|
|
successful_exit()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
main()
|
|
except KeyboardInterrupt:
|
|
|
|
logging.info("Received KeyboardInterrupt")
|
|
|
|
# END OF FILE #################################################################
|
|
|
|
# end
|