filetags/filetags.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
PROG_VERSION = u"Time-stamp: <2016-08-21 22:03:15 vk>"

## TODO:
## - fix parts marked with «FIXXME»
## - move from optparse to argparse
## - tagfilter: --copy :: copy files instead of creating symlinks
## - tagfilter: all toggle-cmd line args as special tags: --copy and so forth
##   - e.g., when user enters tag "--copy" when interactively reading tags, handle it like options.copy
##   - overwriting cmd-line arguments (if contradictory)
##   - allow combination of cmd-line tags and interactive tags
##     - they get combined
## - $HOME/.config/ with default options (e.g., geeqie)
## - tagfilter: additional parameter to move matching files to a temporary subfolder
##   - renaming/deleting of symlinks does not modify original files
## - tagfilter: --recursive :: recursively going into subdirectories and
##      collecting items (into one target directory)
## - tagfilter: --notag :: do not ask for tags, use all items that got no tag
##      at all
## - tagfilter: --ignoredirs :: do not symlink/copy directories
## - tagfilter: --emptytmpdir :: empty temporary directory after the image viewer exits
## - use "open" to open first(?) file


## ===================================================================== ##
##  You might not want to modify anything below this line if you do not  ##
##  know, what you are doing :-)                                         ##
## ===================================================================== ##

## NOTE: in case of issues, check iCalendar files using: http://icalvalid.cloudapp.net/

import re
import sys
import os
import os.path   # for directory traversal to look for .tagfiles
import time
import logging
import operator  # for sorting dicts
import difflib   # for good enough matching words
from sets import Set  # to find out union/intersection of tag sets
import readline  # for raw_input() reading from stdin
import codecs    # for handling Unicode content in .tagfiles
from optparse import OptionParser

PROG_VERSION_DATE = PROG_VERSION[13:23]
INVOCATION_TIME = time.strftime("%Y-%m-%dT%H:%M:%S", time.localtime())
FILENAME_TAG_SEPARATOR = u' -- '
BETWEEN_TAG_SEPARATOR = u' '
CONTROLLED_VOCABULARY_FILENAME = ".filetags"
HINT_FOR_BEING_IN_VOCABULARY_TEMPLATE = ' *'
TAGFILTER_DIRECTORY = os.path.join(os.path.expanduser("~"), ".filetags_tagfilter")
DEFAULT_IMAGE_VIEWER_LINUX = 'geeqie'

unique_tags = [[u'teststring1', u'teststring2']] ## list of list which contains tags that are mutually exclusive
## Note: u'teststring1' and u'teststring2' are hard-coded for testing purposes.
##       You might delete them if you don't use my unit test suite.


USAGE = u"\n\
    " + sys.argv[0] + u" [<options>] <list of files>\n\
\n\
This tool adds or removes simple tags to/from file names.\n\
\n\
Tags within file names are placed between the actual file name and\n\
the file extension, separated with \"" + FILENAME_TAG_SEPARATOR + "\". Multiple tags are\n\
separated with \"" + BETWEEN_TAG_SEPARATOR + "\":\n\
  Update for the Boss" + FILENAME_TAG_SEPARATOR + "projectA" + BETWEEN_TAG_SEPARATOR + "presentation.pptx\n\
  2013-05-16T15.31.42 Error message" + FILENAME_TAG_SEPARATOR + "screenshot" + BETWEEN_TAG_SEPARATOR + "projectB.png\n\
\n\
This easy to use tag system has a drawback: for tagging a larger\n\
set of files with the same tag, you have to rename each file\n\
separately. With this tool, this only requires one step.\n\
\n\
Example usages:\n\
  " + sys.argv[0] + u" --tags=\"presentation projectA\" *.pptx\n\
      ... adds the tags \"presentation\" and \"projectA\" to all PPTX-files\n\
  " + sys.argv[0] + u" -i *\n\
      ... ask for tag(s) and add them to all files in current folder\n\
  " + sys.argv[0] + u" -r draft *report*\n\
      ... removes the tag \"draft\" from all files containing the word \"report\"\n\
\n\
\n\
This tools is looking for (the first) text file named \".filetags\" in\n\
current and parent directories. Each line of it is interpreted as a tag\n\
for tag completion.\n\
\n\
Verbose description: http://Karl-Voit.at/managing-digital-photographs/\n\
\n\
:copyright: (c) by Karl Voit <tools@Karl-Voit.at>\n\
:license: GPL v3 or any later version\n\
:URL: https://github.com/novoid/filetag\n\
:bugreports: via github or <tools@Karl-Voit.at>\n\
:version: " + PROG_VERSION_DATE + "\n"


## file names containing tags matches following regular expression
FILE_WITH_TAGS_REGEX = re.compile("(.+?)" + FILENAME_TAG_SEPARATOR + "(.+?)(\.(\w+))??$")
FILE_WITH_TAGS_REGEX_FILENAME_INDEX = 1  # component.group(1)
FILE_WITH_TAGS_REGEX_TAGLIST_INDEX = 2
FILE_WITH_TAGS_REGEX_EXTENSION_INDEX = 4

FILE_WITH_EXTENSION_REGEX = re.compile("(.*)\.(.*)$")
FILE_WITH_EXTENSION_REGEX_FILENAME_INDEX = 1
FILE_WITH_EXTENSION_REGEX_EXTENSION_INDEX = 2


parser = OptionParser(usage=USAGE)

parser.add_option("-t", "--tag", "--tags", dest="tags",
                  help="one or more tags (in quotes, separated by spaces) to add/remove")

parser.add_option("-r", "--remove", "-d", "--delete", action="store_true",
                  help="remove tags from (instead of adding to) file name(s)")

parser.add_option("-f", "--filter", dest="tagfilter", action="store_true",
                  help="filter according to tags")

parser.add_option("--imageviewer", dest="imageviewer",
                  help="command to view images (for --filter; default: geeqie)")

parser.add_option("-i", "--interactive", action="store_true", dest="interactive",
                  help="interactive mode: ask for (a)dding or (r)emoving and name of tag(s)")

parser.add_option("-s", "--dryrun", dest="dryrun", action="store_true",
                  help="enable dryrun mode: just simulate what would happen, do not modify files")

parser.add_option("--ln", "--list-tags-by-number", dest="list_tags_by_number", action="store_true",
                  help="list all file-tags sorted by their number of use")

parser.add_option("--la", "--list-tags-by-alphabet", dest="list_tags_by_alphabet", action="store_true",
                  help="list all file-tags sorted by their name")

parser.add_option("--lu", "--list-tags-unknown-to-vocabulary", dest="list_unknown_tags", action="store_true",
                  help="list all file-tags which are found in file names but are not part of .filetags")

parser.add_option("--tag-gardening", dest="tag_gardening", action="store_true",
                  help="This is for getting an overview on tags that might require to be renamed (typos, " +
                  "singular/plural, ...). See also http://www.webology.org/2008/v5n3/a58.html")

parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
                  help="enable verbose mode")

parser.add_option("-q", "--quiet", dest="quiet", action="store_true",
                  help="enable quiet mode")

parser.add_option("--version", dest="version", action="store_true",
                  help="display version and exit")

(options, args) = parser.parse_args()


def handle_logging():
    """Log handling and configuration"""

    if options.verbose:
        FORMAT = "%(levelname)-8s %(asctime)-15s %(message)s"
        logging.basicConfig(level=logging.DEBUG, format=FORMAT)
    elif options.quiet:
        FORMAT = "%(levelname)-8s %(message)s"
        logging.basicConfig(level=logging.ERROR, format=FORMAT)
    else:
        FORMAT = "%(levelname)-8s %(message)s"
        logging.basicConfig(level=logging.INFO, format=FORMAT)


def error_exit(errorcode, text):
    """exits with return value of errorcode and prints to stderr"""

    sys.stdout.flush()
    logging.error(text)

    sys.exit(errorcode)


class SimpleCompleter(object):
    ## happily stolen from http://pymotw.com/2/readline/

    def __init__(self, options):
        self.options = sorted(options)
        return

    def complete(self, text, state):
        response = None
        if state == 0:
            # This is the first time for this text, so build a match list.
            if text:
                self.matches = [s
                                for s in self.options
                                if s and s.startswith(text)]
                logging.debug('%s matches: %s', repr(text), self.matches)
            else:
                self.matches = self.options[:]
                logging.debug('(empty input) matches: %s', self.matches)

        # Return the state'th item from the match list,
        # if we have that many.
        try:
            response = self.matches[state]
        except IndexError:
            response = None
        logging.debug('complete(%s, %s) => %s',
                      repr(text), state, repr(response))
        return response


def contains_tag(filename, tagname=False):
    """
    Returns true if tagname is a tag within filename. If tagname is
    empty, return if filename contains any tag at all.

    @param filename: an unicode string containing a file name
    @param tagname: (optional) an unicode string containing a tag name
    @param return: True|False
    """

    assert filename.__class__ == str or \
        filename.__class__ == unicode
    if tagname:
        assert tagname.__class__ == str or \
            tagname.__class__ == unicode

    components = re.match(FILE_WITH_TAGS_REGEX, os.path.basename(filename))

    if not tagname:
        return components is not None
    elif not components:
        logging.debug("file [%s] does not match FILE_WITH_TAGS_REGEX" % filename)
        return False
    else:
        tags = components.group(FILE_WITH_TAGS_REGEX_TAGLIST_INDEX).split(BETWEEN_TAG_SEPARATOR)
        return tagname in tags


def extract_tags_from_filename(filename):
    """
    Returns list of tags contained within filename. If no tag is
    found, return False.

    @param filename: an unicode string containing a file name
    @param return: list of tags
    """

    assert filename.__class__ == str or \
        filename.__class__ == unicode

    components = re.match(FILE_WITH_TAGS_REGEX, filename)

    if not components:
        return []
    else:
        return components.group(FILE_WITH_TAGS_REGEX_TAGLIST_INDEX).split(BETWEEN_TAG_SEPARATOR)


def adding_tag_to_filename(filename, tagname):
    """
    Returns string of file name with tagname as additional tag.

    @param filename: an unicode string containing a file name
    @param tagname: an unicode string containing a tag name
    @param return: an unicode string of filename containing tagname
    """

    assert filename.__class__ == str or \
        filename.__class__ == unicode
    assert tagname.__class__ == str or \
        tagname.__class__ == unicode

    if contains_tag(filename) is False:
        logging.debug(u"adding_tag_to_filename(%s, %s): no tag found so far" % (filename, tagname))

        components = re.match(FILE_WITH_EXTENSION_REGEX, os.path.basename(filename))
        if components:
            old_filename = components.group(FILE_WITH_EXTENSION_REGEX_FILENAME_INDEX)
            extension = components.group(FILE_WITH_EXTENSION_REGEX_EXTENSION_INDEX)
            return os.path.join(os.path.dirname(filename), old_filename + FILENAME_TAG_SEPARATOR + tagname + u'.' + extension)
        else:
            return os.path.join(os.path.dirname(filename), os.path.basename(filename) + FILENAME_TAG_SEPARATOR + tagname)

    elif contains_tag(filename, tagname):
        logging.debug("adding_tag_to_filename(%s, %s): tag already found in filename" % (filename, tagname))

        return filename

    else:
        logging.debug("adding_tag_to_filename(%s, %s): add as additional tag to existing list of tags" %
                      (filename, tagname))

        components = re.match(FILE_WITH_EXTENSION_REGEX, os.path.basename(filename))
        if components:
            old_filename = components.group(FILE_WITH_EXTENSION_REGEX_FILENAME_INDEX)
            extension = components.group(FILE_WITH_EXTENSION_REGEX_EXTENSION_INDEX)
            return os.path.join(os.path.dirname(filename), old_filename + BETWEEN_TAG_SEPARATOR + tagname + u'.' + extension)
        else:
            return os.path.join(os.path.dirname(filename), filename + BETWEEN_TAG_SEPARATOR + tagname)


def removing_tag_from_filename(filename, tagname):
    """
    Returns string of file name with tagname removed as tag.

    @param filename: an unicode string containing a file name
    @param tagname: an unicode string containing a tag name
    @param return: an unicode string of filename without tagname
    """

    assert filename.__class__ == str or \
        filename.__class__ == unicode
    assert tagname.__class__ == str or \
        tagname.__class__ == unicode

    if not contains_tag(filename, tagname):
        return filename

    components = re.match(FILE_WITH_TAGS_REGEX, filename)

    if not components:
        logging.debug("file [%s] does not match FILE_WITH_TAGS_REGEX" % filename)
        return filename
    else:
        tags = components.group(FILE_WITH_TAGS_REGEX_TAGLIST_INDEX).split(BETWEEN_TAG_SEPARATOR)
        old_filename = components.group(FILE_WITH_TAGS_REGEX_FILENAME_INDEX)
        extension = components.group(FILE_WITH_TAGS_REGEX_EXTENSION_INDEX)
        if not extension:
            extension = u''
        else:
            extension = u'.' + extension

        if len(tags) < 2:
            logging.debug("given tagname is the only tag -> remove all tags and FILENAME_TAG_SEPARATOR as well")
            return old_filename + extension
        else:
            ## still tags left
            return old_filename + FILENAME_TAG_SEPARATOR + \
                BETWEEN_TAG_SEPARATOR.join([tag for tag in tags if tag != tagname]) + extension


def extract_tags_from_argument(argument):
    """
    @param argument: string containing one or more tags
    @param return: a list of unicode tags
    """

    assert argument.__class__ == str or \
        argument.__class__ == unicode

    if len(argument) > 0:
        return argument.split(unicode(BETWEEN_TAG_SEPARATOR))
    else:
        return False


def extract_filenames_from_argument(argument):
    """
    @param argument: string containing one or more file names
    @param return: a list of unicode file names
    """

    ## FIXXME: works at my computer without need to convertion but add check later on
    return argument

def get_unique_tags_from_filename(filename):
    """
    Extracts tags that occur in the array of arrays "unique_tags".

    @param filename: string containing one file name
    @param return: list of found tags
    """

    filetags = extract_tags_from_filename(filename)
    result = []
    for tag in filetags:
        for taggroup in unique_tags:
            if tag in taggroup:
               result.append(tag)
    return result

def item_contained_in_list_of_lists(item, list_of_lists):
    """
    Returns true if item is member of at least one list in list_of_lists.

    @param item: item too look for in list_of_lists
    @param list_of_lists: list containing a list of items
    @param return: (item, list) or None
    """

    for current_list in list_of_lists:
        if item in current_list:
            return item, current_list
    return None, None

def handle_file(filename, tags, do_remove, do_filter, dryrun):
    """
    @param filename: string containing one file name
    @param tags: list containing one or more tags
    @param do_remove: boolean which defines if tags should be added (False) or removed (True)
    @param dryrun: boolean which defines if files should be changed (False) or not (True)
    @param return: error value or new filename
    """

    assert filename.__class__ == str or \
        filename.__class__ == unicode
    assert tags.__class__ == list
    if do_remove:
        assert do_remove.__class__ == bool
    if do_filter:
        assert do_filter.__class__ == bool
    if dryrun:
        assert dryrun.__class__ == bool

    if os.path.isdir(filename):
        logging.warning("Skipping directory \"%s\" because this tool only renames file names." % filename)
        return
    elif not os.path.isfile(filename):
        logging.debug("file type error in folder [%s]: file type: is file? %s  -  is dir? %s  -  is mount? %s" % (os.getcwdu(), str(os.path.isfile(filename)), str(os.path.isdir(filename)), str(os.path.islink(filename))))
        logging.error("Skipping \"%s\" because this tool only renames existing file names." % filename)
        return

    if do_filter:

        try:
            print u" link   %s   →   %s" % (filename, TAGFILTER_DIRECTORY)
        except UnicodeEncodeError:
            print u" link   %s   >   %s" % (filename, TAGFILTER_DIRECTORY)
        if not dryrun:
            os.symlink(os.path.join(os.getcwdu(), filename),
                       os.path.join(TAGFILTER_DIRECTORY, filename))


    else: ## add or remove tags:
        new_filename = filename

        for tagname in tags:
            if do_remove:
                new_filename = removing_tag_from_filename(new_filename, tagname)
            else:
                ## FIXXME: not performance optimized for large number of unique tags in many lists:
                tag_in_unique_tags, matching_unique_tag_list = item_contained_in_list_of_lists(tagname, unique_tags)

                if tagname != tag_in_unique_tags:
                    new_filename = adding_tag_to_filename(new_filename, tagname)
                else:
                    ## if tag within unique_tags found, and new unique tag is given, remove old tag:
                    ## e.g.: unique_tags = (u'yes', u'no') -> if 'no' should be added, remove existing tag 'yes' (and vice versa)
                    ## If user enters contradicting tags, only the last one will be applied.
                    ## FIXXME: this is an undocumented feature -> please add proper documentation

                    current_filename_tags = extract_tags_from_filename(new_filename)
                    conflicting_tags = list(set(current_filename_tags).intersection(matching_unique_tag_list))
                    logging.debug("found unique tag %s which require old unique tag(s) to be removed: %s" % (tagname, repr(conflicting_tags)))
                    for conflicting_tag in conflicting_tags:
                        new_filename = removing_tag_from_filename(new_filename, conflicting_tag)
                    new_filename = adding_tag_to_filename(new_filename, tagname)

        if dryrun:
            logging.info(u" ")
            logging.info(u" renaming \"%s\"" % filename)
            try:
                logging.info(u"      ⤷   \"%s\"" % (new_filename))
            except UnicodeEncodeError:
                logging.info(u"      >   \"%s\"" % (new_filename))
        else:
            if filename != new_filename:
                if not options.quiet:
                    try:
                        print u"   %s   →   %s" % (filename, new_filename)
                    except UnicodeEncodeError:
                        print u"   %s   >   %s" % (filename, new_filename)
                logging.debug(u" renaming \"%s\"" % filename)
                try:
                    logging.debug(u"      ⤷   \"%s\"" % (new_filename))
                except UnicodeEncodeError:
                    logging.debug(u"      >   \"%s\"" % (new_filename))
                os.rename(filename, new_filename)

        return new_filename


def add_tag_to_countdict(tag, tags):
    """
    Takes a tag (string) and a dict. Returns the dict with count value increased by one

    @param tag: a (unicode) string
    @param tags: dict of tags
    @param return: dict of tags with incremented counter of tag (or 0 if new)
    """

    assert tag.__class__ == str or \
        tag.__class__ == unicode
    assert tags.__class__ == dict

    if tag in tags.keys():
        tags[tag] = tags[tag] + 1
    else:
        tags[tag] = 1

    return tags


def get_tags_from_files_and_subfolders(startdir=os.getcwdu(), starttags=False, recursive=False):
    """
    Traverses the file system starting with given directory,
    returns dict of all tags (including starttags) of all file

    @param return: dict of tags and their number of occurrence
    """

    assert os.path.isdir(startdir)

    if not starttags:
        tags = {}
    else:
        assert starttags.__class__ == dict
        tags = starttags

    assert not recursive ## FIXXME: not implemented yet

    logging.debug('get_tags_from_files_and_subfolders called with startdir [%s], starttags [%s], recursive[%s]' % (startdir, str(starttags), str(recursive)))
    for root, dirs, files in os.walk(startdir):
        logging.debug('get_tags_from_files_and_subfolders: root [%s]' % root)
        for filename in files:
            for tag in extract_tags_from_filename(filename):
                tags = add_tag_to_countdict(tag, tags)
        for dirname in dirs:
            for tag in extract_tags_from_filename(dirname):
                tags = add_tag_to_countdict(tag, tags)
        break  # do not loop

    return tags


def find_similar_tags(tag, tags):
    """
    Returns a list of entries of tags that are similar to tag (but not same as tag)

    @param tag: a (unicode) string that represents a tag
    @param tags: a list of (unicode) strings
    @param return: list of tags that are similar to tag
    """

    assert tag.__class__ == str or \
        tag.__class__ == unicode
    assert tags.__class__ == list

    similar_tags = difflib.get_close_matches(tag, tags, n=999, cutoff=0.7)
    close_but_not_exact_matches = []

    ## omit exact matches   FIXXME: this can be done in one eloquent line -> refactor
    for match in similar_tags:
        if match != tag:
            close_but_not_exact_matches.append(match)

    return close_but_not_exact_matches


def list_tags_by_alphabet(only_with_similar_tags=False, vocabulary=False):
    """
    Traverses the file system, extracts all tags, prints them sorted by alphabet

    @param only_with_similar_tags: if true, print out only tags with similarity to others
    @param vocabulary: array of tags from controlled vocabulary or False
    @param return: dict of tags (if only_with_similar_tags, tags without similar ones are omitted)
    """

    tag_dict = get_tags_from_files_and_subfolders(startdir=os.getcwdu(), recursive=False)
    if not tag_dict:
        print "\nNo file containing tags found in this folder hierarchy.\n"
        return {}

    ## determine maximum length of strings for formatting:
    maxlength_tags = max(len(s) for s in tag_dict.keys()) + len(HINT_FOR_BEING_IN_VOCABULARY_TEMPLATE)
    maxlength_count = len(str(abs(max(tag_dict.values()))))
    if maxlength_count < 5:
        maxlength_count = 5

    hint_for_being_in_vocabulary = ''
    print("\n  {0:{1}s} : count".format(u'tag', maxlength_tags))
    print "  " + "-" * (maxlength_tags + maxlength_count + 3)

    ## sort dict of (tag, count) according to tag name
    for tuple in sorted(tag_dict.items(), key=operator.itemgetter(0)):

        close_matches = find_similar_tags(tuple[0], tag_dict.keys())
        see_also = u''

        ## if similar names found, format them accordingly for output:
        if len(close_matches) > 0:
            see_also = u'      (similar to:  ' + ', '.join(close_matches) + u')'

        if (only_with_similar_tags and len(close_matches) > 0) or not only_with_similar_tags:
            if vocabulary and tuple[0] in vocabulary:
                hint_for_being_in_vocabulary = HINT_FOR_BEING_IN_VOCABULARY_TEMPLATE
            else:
                hint_for_being_in_vocabulary = ''
            print "  {0:{1}s} : {2:{3}}{4}".format(tuple[0] + hint_for_being_in_vocabulary, maxlength_tags, tuple[1], maxlength_count, see_also)

        if only_with_similar_tags and len(close_matches) == 0:
            ## remove entries from dict for returning only tags with similar tag entries:
            del tag_dict[tuple[0]]

    print ''

    return tag_dict


def list_tags_by_number(max_tag_count=0, vocabulary=False):
    """
    Traverses the file system, extracts all tags, prints them sorted by tag usage count

    @param max_tag_count: print only tags which occur less or equal to this number (disabled if 0)
    @param vocabulary: array of tags from controlled vocabulary or False
    @param return: dict of tags (if max_tag_count is set, returned entries are set accordingly)
    """

    tag_dict = get_tags_from_files_and_subfolders(startdir=os.getcwdu(), recursive=False)
    if not tag_dict:
        print "\nNo file containing tags found in this folder hierarchy.\n"
        return {}

    print_tag_dict(tag_dict, max_tag_count, vocabulary)

    return tag_dict


def print_tag_dict(tag_dict, max_tag_count=0, vocabulary=False):
    """
    Takes a dictionary which holds tag names and their occurrence and prints it to stdout

    @param tag_dict: a dictionary holding tags and their occurrence number
    @param vocabulary: array of tags from controlled vocabulary or False
    @param max_tag_count: print only tags which occur less or equal to this number (disabled if 0)
    """

    ## determine maximum length of strings for formatting:
    maxlength_tags = max(len(s) for s in tag_dict.keys()) + len(HINT_FOR_BEING_IN_VOCABULARY_TEMPLATE)
    maxlength_count = len(str(abs(max(tag_dict.values()))))
    if maxlength_count < 5:
        maxlength_count = 5

    hint_for_being_in_vocabulary = ''
    if vocabulary:
        print u"\n  (Tags marked with an asterisk apprear in your vocabulary.)"
    print "\n {0:{1}} : {2:{3}}".format(u'count', maxlength_count, u'tag', maxlength_tags)
    print " " + '-' * (maxlength_tags + maxlength_count + 7)
    for tuple in sorted(tag_dict.items(), key=operator.itemgetter(1)):
        ## sort dict of (tag, count) according to count
        if (max_tag_count > 0 and tuple[1] <= max_tag_count) or max_tag_count == 0:
            if vocabulary and tuple[0] in vocabulary:
                hint_for_being_in_vocabulary = HINT_FOR_BEING_IN_VOCABULARY_TEMPLATE
            else:
                hint_for_being_in_vocabulary = ''
            print " {0:{1}} : {2:{3}}".format(tuple[1], maxlength_count, tuple[0] + hint_for_being_in_vocabulary, maxlength_tags)

        if max_tag_count > 0 and tuple[1] > max_tag_count:
            ## remove entries that exceed max_tag_count limit:
            del tag_dict[tuple[0]]
    print ''


def list_unknown_tags():
    """
    Traverses the file system, extracts all tags, prints tags that are found in file names which are not found in the controlled vocabulary file .filetags

    @param return: dict of tags (if max_tag_count is set, returned entries are set accordingly)
    """

    file_tag_dict = get_tags_from_files_and_subfolders(startdir=os.getcwdu(), recursive=False)
    if not file_tag_dict:
        print "\nNo file containing tags found in this folder hierarchy.\n"
        return {}

    vocabulary = locate_and_parse_controlled_vocabulary(False)

    ## filter out known tags from tag_dict
    tag_dict = {}
    for entry in file_tag_dict:
        if entry not in vocabulary:
            tag_dict[entry] = file_tag_dict[entry]

    if len(tag_dict) == 0:
        print "\n  " + str(len(file_tag_dict)) + " different tags were found in file names which are all" + \
        " part of your .filetags vocabulary (consisting of " + str(len(vocabulary)) + " tags).\n"
    else:
        print_tag_dict(tag_dict, vocabulary)

    return tag_dict


def handle_tag_gardening(vocabulary):
    """
    This method is quite handy to find tags that might contain typos or do not
    differ much from other tags. You might want to rename them accordinly.

    FIXXME: this is *not* performance optimized since it traverses the file
    system multiple times!

    @param vocabulary: array containing the controlled vocabulary (or False)
    @param return: -
    """

    tag_dict = get_tags_from_files_and_subfolders(startdir=os.getcwdu(), recursive=False)
    if not tag_dict:
        print "\nNo file containing tags found in this folder hierarchy.\n"
        return

    print "\nTags that appear only once are most probably typos or you have forgotten them:"
    tags_by_number = list_tags_by_number(max_tag_count=1, vocabulary=vocabulary)

    print "Tags which have similar other tags are probably typos or plural/singular forms of others:"
    tags_by_alphabet = list_tags_by_alphabet(only_with_similar_tags=True, vocabulary=vocabulary)

    set_by_number = Set(tags_by_number.keys())
    set_by_alphabet = Set(tags_by_alphabet.keys())
    tags_in_both_outputs = set_by_number & set_by_alphabet  # intersection of sets
    hint_for_being_in_vocabulary = ''

    if tags_in_both_outputs != Set([]):
        print "If tags appear in both lists from above, they most likely require your attention:"

        ## determine maximum length of strings for formatting:
        maxlength_tags = max(len(s) for s in tags_in_both_outputs) + len(HINT_FOR_BEING_IN_VOCABULARY_TEMPLATE)
        maxlength_count = len(str(abs(max(tag_dict.values()))))
        if maxlength_count < 5:
            maxlength_count = 5

        print("\n  {0:{1}s} : count".format(u'tag', maxlength_tags))
        print "  " + "-" * (maxlength_tags + maxlength_count + 3)
        for tag in sorted(tags_in_both_outputs):
            if vocabulary and tag in vocabulary:
                hint_for_being_in_vocabulary = HINT_FOR_BEING_IN_VOCABULARY_TEMPLATE
            else:
                hint_for_being_in_vocabulary = ''

            similar_tags = u'      (similar to:  ' + ', '.join(find_similar_tags(tag, tag_dict.keys())) + u')'
            print "  {0:{1}} : {2:{3}}  {4}".format(tag + hint_for_being_in_vocabulary, maxlength_tags, tags_by_number[tag], maxlength_count, similar_tags)
        print


def locate_file_in_cwd_and_parent_directories(startfile, filename):
    """This method looks for the filename in the folder of startfile and its
    parent folders. It returns the file name of the first file name found.

    @param startfile: file whose path is the starting point; if False, the working path is taken
    @param filename: string of file name to look for
    @param return: file name found
    """

    if startfile and os.path.isfile(startfile) and os.path.isfile(os.path.join(os.path.dirname(os.path.abspath(startfile)), filename)):
        logging.debug('found \"%s\" in directory of \"%s\" ..' % (filename, startfile))
        return filename
    elif startfile and os.path.isdir(startfile) and os.path.isfile(os.path.join(startfile, filename)):
        logging.debug('found \"%s\" in directory \"%s\" ...' % (filename, startfile))
        return filename
    else:
        if os.path.isfile(startfile):
            starting_dir = os.path.dirname(os.path.abspath(startfile))
            logging.debug('startfile [%s] found, using it as starting_dir [%s] ....' % (str(startfile), starting_dir))
        elif os.path.isdir(startfile):
            starting_dir = startfile
            logging.debug('startfile [%s] is a directory, using it as starting_dir [%s] .....' % (str(startfile), starting_dir))
        else:
            starting_dir = os.getcwdu()
            logging.debug('no startfile found; using cwd as starting_dir [%s] ......' % (starting_dir))
        parent_dir = os.path.abspath(os.path.join(starting_dir, os.pardir))
        logging.debug('looking for \"%s\" in directory \"%s\" .......' % (filename, parent_dir))
        while parent_dir != os.getcwdu():
            os.chdir(parent_dir)
            filename_to_look_for = os.path.abspath(os.path.join(os.getcwdu(), filename))
            if os.path.isfile(filename_to_look_for):
                logging.debug('found \"%s\" in directory \"%s\" ........' % (filename, parent_dir))
                os.chdir(starting_dir)
                return filename_to_look_for
            parent_dir = os.path.abspath(os.path.join(os.getcwdu(), os.pardir))
        os.chdir(starting_dir)
        logging.debug('did NOT find \"%s\" in current directory or any parent directory' % filename)
        return False


def locate_and_parse_controlled_vocabulary(startfile):

    """This method is looking for files named
    CONTROLLED_VOCABULARY_FILENAME in the directory of startfile and parses
    it. Each line contains a tag which gets read in for tab
    completion.

    @param startfile: file whose location is the starting point of the search
    @param return: either False or a list of found tag strings

    """

    filename = locate_file_in_cwd_and_parent_directories(startfile, CONTROLLED_VOCABULARY_FILENAME)
    global unique_tags

    if filename:
        if os.path.isfile(filename):
            logging.debug('locate_and_parse_controlled_vocabulary: found controlled vocabulary in folder of startfile')
            tags = []
            with codecs.open(filename, encoding='utf-8') as filehandle:
                logging.debug('locate_and_parse_controlled_vocabulary: reading controlled vocabulary in [%s]' % filename)
                for rawline in filehandle:
                    line = rawline.strip()
                    if BETWEEN_TAG_SEPARATOR in line:
                        ## if multiple tags are in one line, they are mutually exclusive: only has can be set via filetags
                        logging.debug('locate_and_parse_controlled_vocabulary: found unique tags: %s' % (line))
                        unique_tags.append(line.split(BETWEEN_TAG_SEPARATOR))
                        for tag in line.split(BETWEEN_TAG_SEPARATOR):
                            ## *also* append unique tags to general tag list:
                            tags.append(tag)
                    else:
                        tags.append(line)
            logging.debug('locate_and_parse_controlled_vocabulary: controlled vocabulary has %i tags' % len(tags))
            logging.debug('locate_and_parse_controlled_vocabulary: controlled vocabulary has %i groups of unique tags' % len(unique_tags))
            return tags
        else:
            logging.debug('locate_and_parse_controlled_vocabulary: could not find controlled vocabulary in folder of startfile')
            return []
    else:
        logging.debug('locate_and_parse_controlled_vocabulary: could not derive filename for controlled vocabulary in folder of startfile')
        return []


def print_tag_shortcut_with_numbers(tag_list, tags_get_added=True, tags_get_linked=False):
    """A list of tags from the list are printed to stdout. Each tag
    gets a number associated which corresponds to the position in the
    list (although starting with 1).

    @param tag_list: list of string holding the tags
    @param tags_get_added: True if tags get added, False otherwise
    @param return: -
    """

    if tags_get_added:
        if len(tag_list) < 9:
            hint_string = u"Previously used tags in this directory:"
        else:
            hint_string = u"Top nine previously used tags in this directory:"
    elif tags_get_linked:
        if len(tag_list) < 9:
            hint_string = u"Used tags in this directory:"
        else:
            hint_string = u"Top nine used tags in this directory:"
    else:
        if len(tag_list) < 9:
            hint_string = u"Possible tags to be removed:"
        else:
            hint_string = u"Top nine possible tags to be removed:"
    print "\n  " + hint_string

    count = 1
    list_of_tag_hints = []
    for tag in tag_list:
        list_of_tag_hints.append(tag + ' (' + str(count) + ')')
        count += 1
    try:
        print u'    ' + u' ⋅ '.join(list_of_tag_hints)
    except UnicodeEncodeError:
        print u'    ' + u' - '.join(list_of_tag_hints)
    print u'' ## newline at end


def check_for_possible_shortcuts_in_entered_tags(tags, list_of_shortcut_tags):
    """
    Returns tags if the only tag is not a shortcut (entered as integer).
    Returns a list of corresponding tags if it's an integer.

    @param tags: list of entered tags from the user, e.g., [u'23']
    @param list_of_shortcut_tags: list of possible shortcut tags, e.g., [u'bar', u'folder1', u'baz']
    @param return: list of tags which were meant by the user, e.g., [u'bar', u'baz']
    """

    assert tags.__class__ == list
    assert list_of_shortcut_tags.__class__ == list

    potential_shortcut_string = tags
    tags = []
    try:
        shortcut_index = int(potential_shortcut_string[0])
        logging.debug('single entered tag is an integer; stepping through the integers')
        for character in list(potential_shortcut_string[0]):
            logging.debug('adding tag number %s' % character)
            try:
                tags.append(list_of_shortcut_tags[int(character)-1])
            except IndexError:
                return potential_shortcut_string
    except ValueError:
        logging.debug('single entered tag is a normal tag')
        tags = potential_shortcut_string

    return tags


def get_upto_nine_keys_of_dict_with_highest_value(mydict):
    """
    Takes a dict, sorts it according to their values, and returns up to nine
    values with the highest values.

    Example1: { "key2":45, "key1": 33} -> [ "key1", "key2" ]

    @param mydict: dictionary holding keys and values
    @param return: list of up to top nine keys according to the rank of their values
    """

    assert mydict.__class__ == dict

    complete_list = sorted(mydict, key=mydict.get, reverse=True)
    return sorted(complete_list[:9])


def ask_for_tags(vocabulary, upto9_tags_for_shortcuts):
    """
    Takes a vocabulary and optional up to nine tags for shortcuts and interactively asks
    the user to enter tags. Aborts program if no tags were entered. Returns list of
    entered tags.

    @param vocabulary: array containing the controlled vocabulary
    @param upto9_tags_for_shortcuts: array of tags which can be used to generate number-shortcuts
    @param return: list of up to top nine keys according to the rank of their values
    """

    if vocabulary and len(vocabulary) > 0:

        assert(vocabulary.__class__ == list)

        # Register our completer function
        readline.set_completer(SimpleCompleter(vocabulary).complete)

        # Use the tab key for completion
        readline.parse_and_bind('tab: complete')

        completionhint = u'; complete %s tags with TAB' % str(len(vocabulary))

    logging.debug("len(args) [%s]" % str(len(args)))
    logging.debug("args %s" % str(args))

    print "                 "
    print "Please enter tags, separated by \"" + BETWEEN_TAG_SEPARATOR + "\"; abort with Ctrl-C" + \
        completionhint
    print "                     "
    print "        ,---------.  "
    print "        |  ?     o | "
    print "        `---------'  "
    print "                     "

    if len(upto9_tags_for_shortcuts) > 0:
        print_tag_shortcut_with_numbers(upto9_tags_for_shortcuts,
                                        tags_get_added=(not options.remove and not options.tagfilter),
                                        tags_get_linked=options.tagfilter)

    logging.debug("interactive mode: asking for tags ...")
    entered_tags = raw_input('Tags: ').strip()
    tags_from_userinput = extract_tags_from_argument(entered_tags)

    if not tags_from_userinput:
        logging.info("no tags given, exiting.")
        sys.stdout.flush()
        sys.exit(0)
    else:
        if len(tags_from_userinput) == 1 and len(upto9_tags_for_shortcuts) > 0:
            ## check if user entered number shortcuts for tags to be removed:
            tags_from_userinput = check_for_possible_shortcuts_in_entered_tags(tags_from_userinput, upto9_tags_for_shortcuts)
        return tags_from_userinput

def get_files_of_directory(directory):
    """
    Lists the files of the given directory and returns a list of its files.

    @param directory: string of an existing directory
    @param return: list of file names of given directory
    """

    files = []
    for (dirpath, dirnames, filenames) in os.walk(directory):
        files.extend(filenames)
        break
    return files

def filter_files_matching_tags(allfiles, tags):
    """
    Returns a list of file names that contain one or more given tags.

    @param allfiles: array of file names
    @param tags: array of tags
    @param return: list of file names that contain at least one tag of tags
    """

    return [x for x in allfiles if set(tags).intersection(set(extract_tags_from_filename(x)))]

def assert_empty_tagfilter_directory():
    """
    Creates non-existent tagfilter directory or deletes and re-creates it.
    """

    ## make sure that temp dir is here:
    if not os.path.isdir(TAGFILTER_DIRECTORY):
        logging.debug('creating non-existent tagfilter directory "%s" ...' % str(TAGFILTER_DIRECTORY))
        if not options.dryrun:
            os.makedirs(TAGFILTER_DIRECTORY)
    else:
        logging.debug('found old tagfilter directory "%s"; deleting directory ...' % str(TAGFILTER_DIRECTORY))
        if not options.dryrun:
            import shutil    # for removing directories with shutil.rmtree()
            shutil.rmtree(TAGFILTER_DIRECTORY)
            logging.debug('re-creating tagfilter directory "%s" ...' % str(TAGFILTER_DIRECTORY))
            os.makedirs(TAGFILTER_DIRECTORY)
    if not options.dryrun:
        assert(os.path.isdir(TAGFILTER_DIRECTORY))


def main():
    """Main function"""

    if options.version:
        print os.path.basename(sys.argv[0]) + " version " + PROG_VERSION_DATE
        sys.exit(0)

    handle_logging()

    if options.verbose and options.quiet:
        error_exit(1, "Options \"--verbose\" and \"--quiet\" found. " +
                   "This does not make any sense, you silly fool :-)")

    ## interactive mode and tags are given
    if options.interactive and options.tags:
        error_exit(3, "I found option \"--tag\" and option \"--interactive\". \n" +
                   "Please choose either tag option OR interactive mode.")

    if options.list_tags_by_number and options.list_tags_by_alphabet:
        error_exit(6, "Please use only one list-by-option at once.")

    if options.tag_gardening and (options.list_tags_by_number or options.list_tags_by_alphabet or options.tags or options.remove):
        error_exit(7, "Please don't use that gardening option together with any other option.")

    if (options.list_tags_by_alphabet or options.list_tags_by_number) and (options.tags or options.interactive or options.remove):
        error_exit(8, "Please don't use list any option together with add/remove tag options.")

    logging.debug("extracting list of files ...")
    logging.debug("len(args) [%s]" % str(len(args)))

    files = extract_filenames_from_argument(args)

    logging.debug("%s filenames found: [%s]" % (str(len(files)), '], ['.join(files)))

    tags_from_userinput = []

    if len(args) < 1 and not (options.tagfilter or options.list_tags_by_alphabet or options.list_tags_by_number or options.list_unknown_tags or options.tag_gardening):
        error_exit(5, "Please add at least one file name as argument")

    if options.list_tags_by_alphabet:
        logging.debug("handling option list_tags_by_alphabet")
        list_tags_by_alphabet()

    elif options.list_tags_by_number:
        logging.debug("handling option list_tags_by_number")
        list_tags_by_number()

    elif options.list_unknown_tags:
        logging.debug("handling option list_unknown_tags")
        list_unknown_tags()

    elif options.tag_gardening:
        logging.debug("handling option for tag gardening")
        handle_tag_gardening(vocabulary)

    elif options.interactive or not options.tags:

        completionhint = u''

        if len(args) < 1 and not options.tagfilter:
            error_exit(5, "Please add at least one file name as argument")

        tags_for_vocabulary = {}
        upto9_tags_for_shortcuts = []

        ## look out for .filetags file and add readline support for tag completion if found with content
        if options.remove:
            ## vocabulary for completing tags is current tags of files
            for currentfile in files:
                ## add tags so that list contains all unique tags:
                for newtag in extract_tags_from_filename(currentfile):
                    add_tag_to_countdict(newtag, tags_for_vocabulary)
            vocabulary = sorted(tags_for_vocabulary.keys())
            upto9_tags_for_shortcuts = sorted(get_upto_nine_keys_of_dict_with_highest_value(tags_for_vocabulary))

        elif options.tagfilter:
            for tag in get_tags_from_files_and_subfolders(startdir=os.getcwdu(), recursive=False):
                add_tag_to_countdict(tag, tags_for_vocabulary)

            logging.debug('generating vocabulary ...')
            vocabulary = sorted(tags_for_vocabulary.keys())
            upto9_tags_for_shortcuts = sorted(get_upto_nine_keys_of_dict_with_highest_value(tags_for_vocabulary))

        else:
            if files:

                logging.debug('deriving upto9_tags_for_shortcuts ...')
                upto9_tags_for_shortcuts = sorted(get_upto_nine_keys_of_dict_with_highest_value(get_tags_from_files_and_subfolders(startdir=os.path.dirname(os.path.abspath(files[0])))))
                logging.debug('derived upto9_tags_for_shortcuts')
            vocabulary = sorted(locate_and_parse_controlled_vocabulary(args[0]))
            logging.debug('derived vocabulary with %i entries' % len(vocabulary))

        ## ==================== Interactive asking user for tags ============================= ##
        tags_from_userinput = ask_for_tags(vocabulary, upto9_tags_for_shortcuts)
        ## ==================== Interactive asking user for tags ============================= ##

    else:
        ## non-interactive: extract list of tags
        logging.debug("non-interactive mode: extracting tags from argument ...")

        tags_from_userinput = extract_tags_from_argument(options.tags)

        if not tags_from_userinput:
            ## FIXXME: check: can this be the case?
            logging.info("no tags given, exiting.")
            sys.stdout.flush()
            sys.exit(0)

    logging.debug("tags found: [%s]" % '], ['.join(tags_from_userinput))
    if options.remove:
        logging.info("removing tags \"%s\" ..." % str(BETWEEN_TAG_SEPARATOR.join(tags_from_userinput)))
    elif options.tagfilter:
        logging.info("filtering items with tags \"%s\" in directory \"%s\" ..." %
                     (str(BETWEEN_TAG_SEPARATOR.join(tags_from_userinput)) , str(TAGFILTER_DIRECTORY)))
    else:
        logging.info("adding tags \"%s\" ..." % str(BETWEEN_TAG_SEPARATOR.join(tags_from_userinput)))

    if options.tagfilter and not files:
        assert_empty_tagfilter_directory()
        files = filter_files_matching_tags(get_files_of_directory(os.getcwdu()), tags_from_userinput)

    logging.debug("iterate over files ...")
    for filename in files:
        if filename.__class__ == str:
            filename = unicode(filename, "UTF-8")
        handle_file(filename, tags_from_userinput, options.remove, options.tagfilter, options.dryrun)

    if options.tagfilter:
        from subprocess import call
        import platform
        if platform.system() == 'Linux':
            call([DEFAULT_IMAGE_VIEWER_LINUX, TAGFILTER_DIRECTORY])

    logging.debug("successfully finished.")


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:

        logging.info("Received KeyboardInterrupt")

## END OF FILE #################################################################

#end