forked from Github/filetags
added "--hardlinks" and more debug output for scanning large hierarchies
This commit is contained in:
parent
f8f1687dd2
commit
49179d4e3d
2 changed files with 66 additions and 42 deletions
64
README.org
64
README.org
|
|
@ -83,19 +83,19 @@ If you use the GitHub sources (and not pip), the executable is
|
|||
** Usage
|
||||
|
||||
#+BEGIN_SRC sh :results output :wrap src
|
||||
./filetags/__init__.py --help
|
||||
./filetags/__init__.py --help | sed 'sX/home/vkX\$HOMEX'
|
||||
#+END_SRC
|
||||
|
||||
#+BEGIN_src
|
||||
usage: filetags [-h] [-t "STRING WITH TAGS"] [--remove] [-i]
|
||||
[-R] [-s] [-f]
|
||||
[--filebrowser PATH_TO_FILEBROWSER] [--tagtrees]
|
||||
[--tagtrees-handle-no-tag "treeroot" | "ignore" | "FOLDERNAME"]
|
||||
[--tagtrees-link-missing-mutual-tagged-items]
|
||||
[--tagtrees-dir <existing_directory>]
|
||||
[--tagtrees-depth TAGTREES_DEPTH] [--ln] [--la]
|
||||
[--lu] [--tag-gardening] [-v] [-q] [--version]
|
||||
[FILE [FILE ...]]
|
||||
usage: ./filetags/__init__.py [-h] [-t "STRING WITH TAGS"] [--remove] [-i]
|
||||
[-R] [-s] [--hardlinks] [-f]
|
||||
[--filebrowser PATH_TO_FILEBROWSER] [--tagtrees]
|
||||
[--tagtrees-handle-no-tag "treeroot" | "ignore" | "FOLDERNAME"]
|
||||
[--tagtrees-link-missing-mutual-tagged-items]
|
||||
[--tagtrees-dir <existing_directory>]
|
||||
[--tagtrees-depth TAGTREES_DEPTH] [--ln] [--la]
|
||||
[--lu] [--tag-gardening] [-v] [-q] [--version]
|
||||
[FILE [FILE ...]]
|
||||
|
||||
This tool adds or removes simple tags to/from file names.
|
||||
|
||||
|
|
@ -131,32 +131,35 @@ positional arguments:
|
|||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-t "STRING WITH TAGS", --tags "STRING WITH TAGS"
|
||||
one or more tags (in quotes, separated by spaces) to
|
||||
One or more tags (in quotes, separated by spaces) to
|
||||
add/remove
|
||||
--remove remove tags from (instead of adding to) file name(s)
|
||||
-i, --interactive interactive mode: ask for (a)dding or (r)emoving and
|
||||
--remove Remove tags from (instead of adding to) file name(s)
|
||||
-i, --interactive Interactive mode: ask for (a)dding or (r)emoving and
|
||||
name of tag(s)
|
||||
-R, --recursive recursively go through the current directory and all
|
||||
-R, --recursive Recursively go through the current directory and all
|
||||
of its subdirectories. Implemented for --tag-gardening
|
||||
and --tagtrees
|
||||
-s, --dryrun enable dryrun mode: just simulate what would happen,
|
||||
-s, --dryrun Enable dryrun mode: just simulate what would happen,
|
||||
do not modify files
|
||||
-f, --filter ask for list of tags and generate links in
|
||||
"/home/USER/.filetags_tagfilter" containing links to all
|
||||
--hardlinks Use hard links instead of symbolic links. This is
|
||||
ignored on Windows systems. Note that renaming link
|
||||
originals when tagging does not work with hardlinks.
|
||||
-f, --filter Ask for list of tags and generate links in
|
||||
"$HOME/.filetags_tagfilter" containing links to all
|
||||
files with matching tags and start the filebrowser.
|
||||
Target directory can be overridden by --tagtrees-dir.
|
||||
--filebrowser PATH_TO_FILEBROWSER
|
||||
use this option to override the tool to view/manage
|
||||
Use this option to override the tool to view/manage
|
||||
files (for --filter; default: geeqie). Use "none" to
|
||||
omit the default one.
|
||||
--tagtrees This generates nested directories in
|
||||
"/home/vk/.filetags_tagfilter" for each combination of
|
||||
"$HOME/.filetags_tagfilter" for each combination of
|
||||
tags up to a limit of 2. Target directory can be
|
||||
overridden by --tagtrees-dir. Please note that this
|
||||
may take long since it relates exponentially to the
|
||||
number of tags involved. See also http://Karl-
|
||||
Voit.at/tagstore/ and http://Karl-
|
||||
Voit.at/tagstore/downloads/Voit2012b.pdf
|
||||
number of tags involved. Can be combined with
|
||||
--filter. See also http://Karl-Voit.at/tagstore/ and
|
||||
http://Karl-Voit.at/tagstore/downloads/Voit2012b.pdf
|
||||
--tagtrees-handle-no-tag "treeroot" | "ignore" | "FOLDERNAME"
|
||||
When tagtrees are created, this parameter defines how
|
||||
to handle items that got no tag at all. The value
|
||||
|
|
@ -178,7 +181,7 @@ optional arguments:
|
|||
--tagtrees-dir <existing_directory>
|
||||
When tagtrees are created, this parameter overrides
|
||||
the default target directory
|
||||
"/home/vk/.filetags_tagfilter" with a user-defined
|
||||
"$HOME/.filetags_tagfilter" with a user-defined
|
||||
one. It has to be an empty directory or a non-existing
|
||||
directory which will be created. This also overrides
|
||||
the default directory for --filter.
|
||||
|
|
@ -191,24 +194,24 @@ optional arguments:
|
|||
instead of symbolic links) the performance is really
|
||||
slow. Choose wisely.
|
||||
--ln, --list-tags-by-number
|
||||
list all file-tags sorted by their number of use
|
||||
List all file-tags sorted by their number of use
|
||||
--la, --list-tags-by-alphabet
|
||||
list all file-tags sorted by their name
|
||||
List all file-tags sorted by their name
|
||||
--lu, --list-tags-unknown-to-vocabulary
|
||||
list all file-tags which are found in file names but
|
||||
List all file-tags which are found in file names but
|
||||
are not part of .filetags
|
||||
--tag-gardening This is for getting an overview on tags that might
|
||||
require to be renamed (typos, singular/plural, ...).
|
||||
See also http://www.webology.org/2008/v5n3/a58.html
|
||||
-v, --verbose enable verbose mode
|
||||
-q, --quiet enable quiet mode
|
||||
--version display version and exit
|
||||
-v, --verbose Enable verbose mode
|
||||
-q, --quiet Enable quiet mode
|
||||
--version Display version and exit
|
||||
|
||||
:copyright: (c) by Karl Voit <tools@Karl-Voit.at>
|
||||
:license: GPL v3 or any later version
|
||||
:URL: https://github.com/novoid/filetags
|
||||
:bugreports: via github or <tools@Karl-Voit.at>
|
||||
:version: 2018-04-25
|
||||
:version: 2018-08-02
|
||||
·
|
||||
#+END_src
|
||||
|
||||
|
|
@ -322,6 +325,7 @@ For =--filter= and =--tagtrees= examples see sections below.
|
|||
- added hints to [[https://github.com/novoid/integratethis][=integratethis=]] to ease the Windows Explorer
|
||||
integration
|
||||
- 2018-07-23: =--tagtrees== can now be filtered with =--filter=
|
||||
- 2018-08-02: added option =--hardlinks= as an alternative for non-Windows systems
|
||||
|
||||
** Get the most out of filetags: controlled vocabulary ~.filetags~
|
||||
:PROPERTIES:
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
PROG_VERSION = "Time-stamp: <2018-08-02 20:38:00 vk>"
|
||||
PROG_VERSION = "Time-stamp: <2018-08-02 21:15:14 vk>"
|
||||
|
||||
# TODO:
|
||||
# - fix parts marked with «FIXXME»
|
||||
|
|
@ -176,6 +176,10 @@ parser.add_argument("-R", "--recursive", dest="recursive", action="store_true",
|
|||
parser.add_argument("-s", "--dryrun", dest="dryrun", action="store_true",
|
||||
help="Enable dryrun mode: just simulate what would happen, do not modify files")
|
||||
|
||||
parser.add_argument("--hardlinks", dest="hardlinks", action="store_true",
|
||||
help="Use hard links instead of symbolic links. This is ignored on Windows systems. " +
|
||||
"Note that renaming link originals when tagging does not work with hardlinks.")
|
||||
|
||||
parser.add_argument("-f", "--filter", dest="tagfilter", action="store_true",
|
||||
help="Ask for list of tags and generate links in \"" + TAGFILTER_DIRECTORY + "\" " +
|
||||
"containing links to all files with matching tags and start the filebrowser. " +
|
||||
|
|
@ -960,8 +964,7 @@ def handle_file_and_optional_link(orig_filename, tags, do_remove, do_filter, dry
|
|||
|
||||
|
||||
def create_link(source, destination):
|
||||
"""
|
||||
On non-Windows systems, a symbolic link is created that links
|
||||
"""On non-Windows systems, a symbolic link is created that links
|
||||
source (existing file) to destination (the new symlink). On
|
||||
Windows systems a lnk-file is created instead.
|
||||
|
||||
|
|
@ -974,8 +977,12 @@ def create_link(source, destination):
|
|||
This is the reason why the "--tagrees" option does perform really bad
|
||||
on Windows. And "really bad" means factor 10 to 1000. I measured it.
|
||||
|
||||
The command link option "--hardlinks" switches to hardlinks. This
|
||||
is ignored on Windows systems.
|
||||
|
||||
@param source: a file name of the source, an existing file
|
||||
@param destination: a file name for the link which is about to be created
|
||||
|
||||
"""
|
||||
|
||||
logging.debug('create_link(' + source + ', ' + destination + ') called')
|
||||
|
|
@ -994,8 +1001,16 @@ def create_link(source, destination):
|
|||
shortcut.save()
|
||||
|
||||
else:
|
||||
# for normal operating systems, use good old high-performing symbolic links:
|
||||
os.symlink(source, destination)
|
||||
# for normal operating systems:
|
||||
if options.hardlinks:
|
||||
try:
|
||||
# use good old high-performing hard links:
|
||||
os.link(source, destination)
|
||||
except OSError:
|
||||
logging.warning('Due to cross-device links, I had to use a symbolic link as a fall-back for: ' + source)
|
||||
else:
|
||||
# use good old high-performing symbolic links:
|
||||
os.symlink(source, destination)
|
||||
|
||||
|
||||
def handle_file(orig_filename, tags, do_remove, do_filter, dryrun):
|
||||
|
|
@ -1223,12 +1238,12 @@ def get_tags_from_files_and_subfolders(startdir=os.getcwd(), use_cache=True):
|
|||
(startdir, str(len(list(cache_of_tags_by_folder.keys())))))
|
||||
|
||||
if use_cache and startdir in list(cache_of_tags_by_folder.keys()):
|
||||
logging.debug("found " + str(len(cache_of_tags_by_folder[startdir])) +
|
||||
logging.debug("get_tags_from_files_and_subfolders: found " + str(len(cache_of_tags_by_folder[startdir])) +
|
||||
" tags in cache for directory: " + startdir)
|
||||
return cache_of_tags_by_folder[startdir]
|
||||
|
||||
elif use_cache and startdir in cache_of_files_with_metadata.keys():
|
||||
logging.debug('using cache_of_files_with_metadata instead of traversing file system again')
|
||||
logging.debug('get_tags_from_files_and_subfolders: using cache_of_files_with_metadata instead of traversing file system again')
|
||||
cachedata = cache_of_files_with_metadata[startdir]
|
||||
|
||||
# FIXXME: check if tags are extracted from dirnames as in traversal algorithm below
|
||||
|
|
@ -1258,7 +1273,7 @@ def get_tags_from_files_and_subfolders(startdir=os.getcwd(), use_cache=True):
|
|||
options.tag_gardening)):
|
||||
break # do not loop
|
||||
|
||||
logging.debug("Writing " + str(len(list(tags.keys()))) +
|
||||
logging.debug("get_tags_from_files_and_subfolders: Writing " + str(len(list(tags.keys()))) +
|
||||
" tags in cache for directory: " + startdir)
|
||||
if use_cache:
|
||||
cache_of_tags_by_folder[startdir] = tags
|
||||
|
|
@ -1921,12 +1936,17 @@ def get_files_of_directory(directory):
|
|||
"""
|
||||
|
||||
files = []
|
||||
logging.debug('get_files_of_directory(' + directory + ') called and traversing file system ...')
|
||||
for (dirpath, dirnames, filenames) in os.walk(directory):
|
||||
if len(files) % 5000 == 0 and len(files) > 0:
|
||||
# while debugging a large hierarchy scan, I'd like to print out some stuff in-between scanning
|
||||
logging.info('found ' + str(len(files)) + ' files so far ... counting ...')
|
||||
if options.recursive:
|
||||
files.extend([os.path.join(dirpath, x) for x in filenames])
|
||||
else:
|
||||
files.extend(filenames)
|
||||
break
|
||||
logging.debug('get_files_of_directory(' + directory + ') finished with ' + str(len(files)) + ' items')
|
||||
|
||||
return files
|
||||
|
||||
|
|
@ -2086,6 +2106,7 @@ def generate_tagtrees(directory, maxdepth, ignore_nontagged, nontagged_subdir, l
|
|||
'even simple to find and catch while testing for me either. Or was it? Make an educated guess. :-)')
|
||||
|
||||
if filtertags:
|
||||
logging.debug('generate_tagtrees: filtering tags ...')
|
||||
files = filter_files_matching_tags(files, filtertags)
|
||||
|
||||
if len(files) == 0 and not options.recursive:
|
||||
|
|
@ -2099,7 +2120,7 @@ def generate_tagtrees(directory, maxdepth, ignore_nontagged, nontagged_subdir, l
|
|||
controlled_vocabulary_filename = locate_file_in_cwd_and_parent_directories(os.getcwd(),
|
||||
CONTROLLED_VOCABULARY_FILENAME)
|
||||
if controlled_vocabulary_filename:
|
||||
logging.debug('I found controlled_vocabulary_filename "' +
|
||||
logging.debug('generate_tagtrees: I found controlled_vocabulary_filename "' +
|
||||
controlled_vocabulary_filename +
|
||||
'" which I\'m going to link to the tagtrees folder')
|
||||
if not options.dryrun:
|
||||
|
|
@ -2108,7 +2129,7 @@ def generate_tagtrees(directory, maxdepth, ignore_nontagged, nontagged_subdir, l
|
|||
CONTROLLED_VOCABULARY_FILENAME))
|
||||
|
||||
else:
|
||||
logging.debug('I did not find a controlled_vocabulary_filename')
|
||||
logging.debug('generate_tagtrees: I did not find a controlled_vocabulary_filename')
|
||||
|
||||
logging.info('Creating tagtrees and their links. It may take a while … ' +
|
||||
'(exponentially with respect to number of tags)')
|
||||
|
|
@ -2142,7 +2163,6 @@ def generate_tagtrees(directory, maxdepth, ignore_nontagged, nontagged_subdir, l
|
|||
filename, dirname, \
|
||||
basename, basename_without_lnk = split_up_filename(currentfile[1])
|
||||
|
||||
|
||||
logging.debug('generate_tagtrees: handling file "' + filename + '" …')
|
||||
|
||||
if len(tags_of_currentfile) == 0:
|
||||
|
|
@ -2224,7 +2244,7 @@ def generate_tagtrees(directory, maxdepth, ignore_nontagged, nontagged_subdir, l
|
|||
no_uniqueset_tag_found_dir = os.path.join(directory,
|
||||
'no-' + ("-").join(unique_tagset)) # example: "no-draft-final"
|
||||
if not os.path.isdir(no_uniqueset_tag_found_dir):
|
||||
logging.debug('creating non-existent no_uniqueset_tag_found_dir "%s" ...' %
|
||||
logging.debug('generate_tagtrees: creating non-existent no_uniqueset_tag_found_dir "%s" ...' %
|
||||
str(no_uniqueset_tag_found_dir))
|
||||
if not options.dryrun:
|
||||
os.makedirs(no_uniqueset_tag_found_dir)
|
||||
|
|
|
|||
Loading…
Reference in a new issue