forked from Github/guess-filename.py
added directory handling, fine-tuning of fuzzy, ...
This commit is contained in:
parent
f37199e184
commit
c267a7a8cc
2 changed files with 85 additions and 32 deletions
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Time-stamp: <2016-03-06 12:12:53 vk>
|
||||
# Time-stamp: <2016-03-06 15:30:41 vk>
|
||||
|
||||
## TODO:
|
||||
## * fix parts marked with «FIXXME»
|
||||
|
|
@ -93,8 +93,6 @@ class GuessFilename(object):
|
|||
Contains methods of the guess filename domain
|
||||
"""
|
||||
|
||||
oldfilename = None
|
||||
|
||||
FILENAME_TAG_SEPARATOR = u' -- '
|
||||
BETWEEN_TAG_SEPARATOR = u' '
|
||||
|
||||
|
|
@ -114,6 +112,11 @@ class GuessFilename(object):
|
|||
EURO_CHARGE_REGEX = re.compile(u"^(.+[-_ ])?(\d+([,.]\d+)?)[-_ ]?(EUR|€)([-_ .].+)?$")
|
||||
EURO_CHARGE_INDEX = 2
|
||||
|
||||
logger = None
|
||||
|
||||
def __init__(self, logger):
|
||||
self.logger = logger
|
||||
|
||||
def adding_tags(self, tagarray, newtags):
|
||||
"""
|
||||
Returns unique array of tags containing the newtag.
|
||||
|
|
@ -134,28 +137,40 @@ class GuessFilename(object):
|
|||
|
||||
return resulting_tags
|
||||
|
||||
def rename_file(self, oldfilename, newfilename, dryrun=False, quiet=False):
|
||||
def rename_file(self, dirname, oldbasename, newbasename, dryrun=False, quiet=False):
|
||||
"""
|
||||
Renames a file from oldfilename to newfilename.
|
||||
Renames a file from oldbasename to newbasename in dirname.
|
||||
|
||||
Only simulates result if dryrun is True.
|
||||
|
||||
@param oldfilename: string containing the old file name
|
||||
@param newfilename: string containing the new file name
|
||||
@param dirname: string containing the directory of the file
|
||||
@param oldbasename: string containing the old file name (oldbasename)
|
||||
@param newbasename: string containing the new file name (oldbasename)
|
||||
@param dryrun: boolean which defines if files should be changed (False) or not (True)
|
||||
"""
|
||||
|
||||
if dryrun:
|
||||
logging.info(u" ")
|
||||
logging.info(u" renaming \"%s\"" % oldfilename)
|
||||
logging.info(u" ⤷ \"%s\"" % (newfilename))
|
||||
else:
|
||||
if oldfilename != newfilename:
|
||||
if not quiet:
|
||||
print u" %s ⤷ %s" % (oldfilename, newfilename)
|
||||
logging.debug(u" renaming \"%s\"" % oldfilename)
|
||||
logging.debug(u" ⤷ \"%s\"" % (newfilename))
|
||||
os.rename(oldfilename, newfilename)
|
||||
if oldbasename == newbasename:
|
||||
logging.debug("old filename is same as new filename [%s]. Doing nothing." % oldbasename)
|
||||
return False
|
||||
|
||||
oldfile = os.path.join(dirname, oldbasename)
|
||||
newfile = os.path.join(dirname, newbasename)
|
||||
|
||||
if not os.path.isfile(oldfile):
|
||||
logging.error("file to rename does not exist: [%s]" % oldfile)
|
||||
return False
|
||||
|
||||
if os.path.isfile(newfile):
|
||||
logging.error("file can't be renamed since new file name already exists: [%s]" % newfile)
|
||||
return False
|
||||
|
||||
if not quiet:
|
||||
print u" %s → %s" % (oldbasename, newbasename)
|
||||
logging.debug(u" renaming \"%s\"" % oldfile)
|
||||
logging.debug(u" ⤷ \"%s\"" % newfile)
|
||||
if not dryrun:
|
||||
os.rename(oldfile, newfile)
|
||||
return True
|
||||
|
||||
def derive_new_filename_from_old_filename(s, oldfilename):
|
||||
"""
|
||||
|
|
@ -166,6 +181,7 @@ class GuessFilename(object):
|
|||
@param return: False or new oldfilename
|
||||
"""
|
||||
|
||||
logging.debug("derive_new_filename_from_old_filename called")
|
||||
datetimestr, basefilename, tags, extension = s.split_filename_entities(oldfilename)
|
||||
|
||||
if s.contains_one_of(oldfilename, [" A1 ", " a1 "]) and s.has_euro_charge(oldfilename) and datetimestr:
|
||||
|
|
@ -199,8 +215,21 @@ class GuessFilename(object):
|
|||
logging.error("Skipping \"%s\" because this tool only renames existing file names." % oldfilename)
|
||||
return
|
||||
|
||||
self.oldfilename = oldfilename
|
||||
dirname = os.path.abspath(os.path.dirname(oldfilename))
|
||||
logging.debug(u"————→ dirname [%s]" % dirname)
|
||||
basename = os.path.basename(oldfilename)
|
||||
logging.debug(u"————→ basename [%s]" % basename)
|
||||
|
||||
## FIXXME: separate directory from filename
|
||||
|
||||
newfilename = self.derive_new_filename_from_old_filename(basename)
|
||||
|
||||
if newfilename:
|
||||
logging.debug("derive_new_filename_from_old_filename returned new filename: %s" % newfilename)
|
||||
self.rename_file(dirname, basename, newfilename, dryrun)
|
||||
return newfilename
|
||||
|
||||
## FIXXME: try to derive new filename from content
|
||||
|
||||
def split_filename_entities(self, filename):
|
||||
"""
|
||||
|
|
@ -254,9 +283,12 @@ class GuessFilename(object):
|
|||
|
||||
for entry in entries:
|
||||
similarity = fuzz.partial_ratio(string, entry)
|
||||
if similarity > 65:
|
||||
logging.debug("fuzzy_contains_one_of(%s, %s) == %i" % (string, str(entries), similarity))
|
||||
if similarity > 64:
|
||||
#logging.debug(u"MATCH fuzzy_contains_one_of(%s, %s) == %i" % (string, str(entry), similarity))
|
||||
return True
|
||||
else:
|
||||
#logging.debug(u"¬ MATCH fuzzy_contains_one_of(%s, %s) == %i" % (string, str(entry), similarity))
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
|
|
@ -305,19 +337,20 @@ def main():
|
|||
error_exit(1, "Options \"--verbose\" and \"--quiet\" found. " +
|
||||
"This does not make any sense, you silly fool :-)")
|
||||
|
||||
if options.dryrun:
|
||||
logging.debug("DRYRUN active, not changing any files")
|
||||
logging.debug("extracting list of files ...")
|
||||
logging.debug("len(args) [%s]" % str(len(args)))
|
||||
|
||||
files = args
|
||||
|
||||
logging.debug("%s filenames found: [%s]" % (str(len(files)), '], ['.join(files)))
|
||||
|
||||
guess_filename = GuessFilename()
|
||||
guess_filename = GuessFilename(logging.getLogger())
|
||||
|
||||
if len(args) < 1:
|
||||
error_exit(5, "Please add at least one file name as argument")
|
||||
|
||||
logging.debug("iterate over files ...")
|
||||
logging.debug("iterating over files ...\n" + "=" * 80)
|
||||
for filename in files:
|
||||
if filename.__class__ == str:
|
||||
filename = unicode(filename, "UTF-8")
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8; mode: python; -*-
|
||||
# Time-stamp: <2016-03-06 12:28:12 vk>
|
||||
# Time-stamp: <2016-03-06 15:30:27 vk>
|
||||
|
||||
import unittest
|
||||
import logging
|
||||
from guessfilename import GuessFilename
|
||||
|
||||
class TestGuessFilename(unittest.TestCase):
|
||||
|
|
@ -10,11 +11,25 @@ class TestGuessFilename(unittest.TestCase):
|
|||
logging = None
|
||||
guess_filename = None
|
||||
|
||||
def handle_logging(self, verbose=False, quiet=False):
|
||||
"""Log handling and configuration"""
|
||||
|
||||
if verbose:
|
||||
FORMAT = "%(levelname)-8s %(asctime)-15s %(message)s"
|
||||
logging.basicConfig(level=logging.DEBUG, format=FORMAT)
|
||||
elif quiet:
|
||||
FORMAT = "%(levelname)-8s %(message)s"
|
||||
logging.basicConfig(level=logging.ERROR, format=FORMAT)
|
||||
else:
|
||||
FORMAT = "%(levelname)-8s %(message)s"
|
||||
logging.basicConfig(level=logging.INFO, format=FORMAT)
|
||||
|
||||
|
||||
def setUp(self):
|
||||
verbose = True
|
||||
quiet = False
|
||||
self.guess_filename = GuessFilename()
|
||||
self.guess_filename.verbose = verbose
|
||||
self.handle_logging(verbose, quiet)
|
||||
self.guess_filename = GuessFilename(logging)
|
||||
|
||||
def tearDown(self):
|
||||
pass
|
||||
|
|
@ -52,6 +67,7 @@ class TestGuessFilename(unittest.TestCase):
|
|||
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'ba']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'x', u'ba', u'yuio']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"Kundennummer 1234567890", [u'12345']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"Kundennummer 1234567890", [u'12345']))
|
||||
|
||||
## fuzzy similarities:
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", ['xfoo']))
|
||||
|
|
@ -59,17 +75,21 @@ class TestGuessFilename(unittest.TestCase):
|
|||
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'xbar']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'xba']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'x', u'xba', u'yuio']))
|
||||
#self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"Kundennummer 1234567890", [u'1234581388']))
|
||||
#self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"Rundemummer 1234567890", [u'Rundemummer 1234581388']))
|
||||
#self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"Rundemummer 1234567890", [u'Rumdemummer 1234581388']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"Kundennummer 1234567890", [u'7234567880']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"Kundennummer 1234567890", [u'Rundemummer 7234567880']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"Kundennummer 1234567890", [u'Rumdemummer 7234567880']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"Kundennummer 1234567890", [u'Rundemummer 1234581388']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"Kundennummer 1234567890", [u'Rumdemummer 1234581388']))
|
||||
|
||||
## fuzzy non-matches:
|
||||
self.assertFalse(self.guess_filename.fuzzy_contains_one_of(u"Kundennummer 1234567890", [u' 345 ']))
|
||||
self.assertFalse(self.guess_filename.fuzzy_contains_one_of(u"Kundennummer 1234567890", [u'1234581388']))
|
||||
self.assertFalse(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'xyz']))
|
||||
self.assertFalse(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'111']))
|
||||
self.assertFalse(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'xby']))
|
||||
self.assertFalse(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'x', u'yyy', u'yuio']))
|
||||
#self.assertFalse(self.guess_filename.fuzzy_contains_one_of(u"Kundennummer 1234567890", [u'12345', u' 345 ', u'0987654321']))
|
||||
#self.assertFalse(self.guess_filename.fuzzy_contains_one_of(u"Kundennummer 1234567890", [u'12345']))
|
||||
self.assertFalse(self.guess_filename.fuzzy_contains_one_of(u"Kundennummer 1234567890", [u'0987654321']))
|
||||
self.assertFalse(self.guess_filename.fuzzy_contains_one_of(u"Kundennummer 1234567890", [u'Rumdemummer 1234555555']))
|
||||
|
||||
def test_has_euro_charge(self):
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue