added pattern matching for android screenshots

This commit is contained in:
Karl Voit 2016-12-28 14:10:54 +01:00
parent 2694d786f2
commit bbd14c38c8

View file

@ -1,6 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Time-stamp: <2016-04-08 16:32:33 vk>
# Time-stamp: <2016-12-28 14:10:43 vk>
# TODO:
# * add -i (interactive) where user gets asked if renaming should be done (per file)
@ -109,21 +109,26 @@ class GuessFilename(object):
# file names containing tags matches following regular expression
# ( (date(time)?)?(--date(time)?)? )? filename (tags)? (extension)?
DAY_REGEX = "[12]\d{3}-?[01]\d-?[0123]\d" # note: I made the dashes between optional to match simpler format as well
TIME_REGEX = "T[012]\d.[012345]\d(.[012345]\d)?"
DAYTIME_REGEX = "(" + DAY_REGEX + "(" + TIME_REGEX + ")?)"
DAYTIME_DURATION_REGEX = DAYTIME_REGEX + "(--?" + DAYTIME_REGEX + ")?"
DAY_REGEX = '[12]\d{3}-?[01]\d-?[0123]\d' # note: I made the dashes between optional to match simpler format as well
TIME_REGEX = 'T[012]\d.[012345]\d(.[012345]\d)?'
ISO_NAME_TAGS_EXTENSION_REGEX = re.compile("((" + DAYTIME_DURATION_REGEX + ")[ -_])?(.+?)(" + FILENAME_TAG_SEPARATOR + "((\w+[" + BETWEEN_TAG_SEPARATOR + "]?)+))?(\.(\w+))?$")
DAYTIME_REGEX = '(' + DAY_REGEX + '(' + TIME_REGEX + ')?)'
DAYTIME_DURATION_REGEX = DAYTIME_REGEX + '(--?' + DAYTIME_REGEX + ')?'
ISO_NAME_TAGS_EXTENSION_REGEX = re.compile('((' + DAYTIME_DURATION_REGEX + ')[ -_])?(.+?)(' + FILENAME_TAG_SEPARATOR + '((\w+[' + BETWEEN_TAG_SEPARATOR + ']?)+))?(\.(\w+))?$')
DAYTIME_DURATION_INDEX = 2
NAME_INDEX = 10
TAGS_INDEX = 12
EXTENSION_INDEX = 15
RAW_EURO_CHARGE_REGEX = u"(\d+([,.]\d+)?)[-_ ]?(EUR|€)"
EURO_CHARGE_REGEX = re.compile(u"^(.+[-_ ])?" + RAW_EURO_CHARGE_REGEX + "([-_ .].+)?$")
RAW_EURO_CHARGE_REGEX = u'(\d+([,.]\d+)?)[-_ ]?(EUR|€)'
EURO_CHARGE_REGEX = re.compile(u'^(.+[-_ ])?' + RAW_EURO_CHARGE_REGEX + '([-_ .].+)?$')
EURO_CHARGE_INDEX = 2
ANDROID_SCREENSHOT_REGEX = 'Screenshot_([12]\d{3})-?([01]\d)-?([0123]\d)' + '-?' + \
'([012]\d).?([012345]\d)(.?([012345]\d))?' + '( .*)?.png'
ANDROID_SCREENSHOT_INDEXGROUPS = [1, '-', 2, '-', 3, 'T', 4, '.', 5, '.', 7, 8, ' -- screenshots android.png']
logger = None
config = None
@ -358,6 +363,67 @@ class GuessFilename(object):
os.rename(oldfile, newfile)
return True
def build_string_via_indexgroups(self, regex_match, indexgroups):
"""This function takes a regex_match object and concatenates its
groups. It does this by traversing the list of indexgroups. If
the list item is an integer, the corresponding
regex_match.group() is appended to the result string. If the
list item is a string, the string is appended to the result
string.
When a list item is a list, its elements are appended as well as
long as all list items exist.
match-groups that are in the indexgroups but are None are ignored.
@param regex_match: a regex match object from re.match(REGEX, STRING)
@param indexgroups: list of strings and integers like [1, '-', 2, '-', 3, 'T', 4, '.', 5, ' foo .png']
@param return: string containing the concatenated string
"""
if not regex_match:
return "ERROR: no re.match object found"
def append_element(string, indexgroups):
result = string
for element in indexgroups:
if type(element) == str:
result += element
#print 'DEBUG: result after element [' + str(element) + '] = [' + str(result) + ']'
elif type(element) == int:
potential_element = regex_match.group(element)
# ignore None matches
if potential_element:
result += regex_match.group(element)
#print 'DEBUG: result after element [' + str(element) + '] = [' + str(result) + ']'
else:
#print 'DEBUG: match-group element ' + str(element) + ' is None'
pass
elif type(element) == list:
# recursive: if a list element is a list, process if all elements exists:
#print 'DEBUG: found list item = ' + str(element)
#print 'DEBUG: result before = [' + str(result) + ']'
all_found = True
for listelement in element:
if type(listelement) == int and (regex_match.group(listelement) is None or
len(regex_match.group(listelement)) <1):
all_found = False
if all_found:
result = append_element(result, element)
#print 'DEBUG: result after = [' + str(result) + ']'
else:
pass
#print 'DEBUG: result after = [' + str(result) + ']' + \
# ' -> not changed because one or more elements of sub-list were not found'
return result
#print 'DEBUG: ' + 'v' * 50 + '\n' + 'DEBUG: FILE: ' + str(regex_match.group(0))
#print 'DEBUG: GROUPS: ' + str(regex_match.groups())
result = append_element(u'', indexgroups)
#print 'DEBUG: ' + '^' * 50
return result
def derive_new_filename_from_old_filename(self, oldfilename):
"""
Analyses the old filename and returns a new one if feasible.
@ -370,6 +436,11 @@ class GuessFilename(object):
logging.debug("derive_new_filename_from_old_filename called")
datetimestr, basefilename, tags, extension = self.split_filename_entities(oldfilename)
# Screenshot_2013-03-05-08-14-09.png -> 2013-03-05T08-14-09 -- android screenshots.png
regex_match = re.match(self.ANDROID_SCREENSHOT_REGEX, oldfilename)
if regex_match:
return self.build_string_via_indexgroups(regex_match, self.ANDROID_SCREENSHOT_INDEXGROUPS)
# 2015-11-24 Rechnung A1 Festnetz-Internet 12,34€ -- scan bill.pdf
if self.contains_one_of(oldfilename, [" A1 ", " a1 "]) and self.has_euro_charge(oldfilename) and datetimestr:
return datetimestr + \
@ -401,7 +472,7 @@ class GuessFilename(object):
# 2016-02-26 Gehaltszettel Februar 12,34 EUR -- scan infonova.pdf
if self.contains_all_of(oldfilename, ["Gehalt", "infonova"]) and self.has_euro_charge(oldfilename) and datetimestr:
return datetimestr + \
u" Gehaltszettel Februar " + self.get_euro_charge(oldfilename) + \
u" Gehaltszettel " + self.get_euro_charge(oldfilename) + \
u"€ -- " + ' '.join(self.adding_tags(tags, ['scan', 'infonova'])) + \
u".pdf"