forked from Github/guess-filename.py
2 additional examples
This commit is contained in:
parent
45797a0414
commit
8d95218dbd
2 changed files with 29 additions and 3 deletions
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Time-stamp: <2016-03-08 16:50:42 vk>
|
||||
# Time-stamp: <2016-03-08 17:33:13 vk>
|
||||
|
||||
# TODO:
|
||||
# * fix parts marked with «FIXXME»
|
||||
|
|
@ -125,6 +125,7 @@ class GuessFilename(object):
|
|||
|
||||
def __init__(self, config, logger):
|
||||
self.logger = logger
|
||||
self.config = config
|
||||
|
||||
def adding_tags(self, tagarray, newtags):
|
||||
"""
|
||||
|
|
@ -218,6 +219,8 @@ class GuessFilename(object):
|
|||
assert(len(entries) > 0)
|
||||
|
||||
for entry in entries:
|
||||
assert(type(entry) == unicode or type(entry) == str)
|
||||
#logging.debug(u"fuzzy_contains_all_of(%s, %s) ... " % (string[:30], str(entry[:30])))
|
||||
similarity = fuzz.partial_ratio(string, entry)
|
||||
if similarity > 64:
|
||||
#logging.debug(u"MATCH fuzzy_contains_all_of(%s, %s) == %i" % (string, str(entry), similarity))
|
||||
|
|
@ -279,6 +282,7 @@ class GuessFilename(object):
|
|||
return floatstring
|
||||
else:
|
||||
logging.debug(u"get_euro_charge_from_context was not able to extract a float: [%s] + [%s] + [%s]" % (before, string[:30] + u"...", after))
|
||||
import pdb; pdb.set_trace()
|
||||
return False
|
||||
|
||||
def rename_file(self, dirname, oldbasename, newbasename, dryrun=False, quiet=False):
|
||||
|
|
@ -376,7 +380,7 @@ class GuessFilename(object):
|
|||
u".pdf"
|
||||
|
||||
# 2015-11-20 Kirchenbeitrag 12,34 EUR -- scan taxes bill.pdf
|
||||
if self.fuzzy_contains_one_of(content, ["4294-0208", "AT086000000007042401", "Kontonachricht"]) and \
|
||||
if self.fuzzy_contains_all_of(content, ["4294-0208", "AT086000000007042401", "Kontonachricht"]) and \
|
||||
datetimestr:
|
||||
floatstr = self.get_euro_charge_from_context(content, "Offen", "Zahlungen")
|
||||
if not floatstr:
|
||||
|
|
@ -387,6 +391,26 @@ class GuessFilename(object):
|
|||
' '.join(self.adding_tags(tags, ['scan', 'taxes', 'bill'])) + \
|
||||
u".pdf"
|
||||
|
||||
# 2015-11-24 Generali Erhoehung Dynamikklausel - Praemie nun 12,34 - Polizze 12345 -- scan finance.pdf
|
||||
if self.config.GENERALI1_POLIZZE_NUMBER in content and \
|
||||
self.fuzzy_contains_all_of(content, [u"ImHinblickaufdievereinbarteDynamikklauseltritteineWertsteigerunginKraft",
|
||||
u"IhreangepasstePrämiebeträgtdahermonatlich",
|
||||
u"AT44ZZZ00000002054"]) and \
|
||||
datetimestr:
|
||||
floatstr = self.get_euro_charge_from_context(content,
|
||||
"IndiesemBetragistauchdiegesetzlicheVersicherungssteuerenthalten.EUR",
|
||||
"Wird")
|
||||
if not floatstr:
|
||||
floatstr = 'FIXXME'
|
||||
logging.warning(u"Could not parse the charge from file %s - please fix manually" % basename)
|
||||
return datetimestr + \
|
||||
u" Generali Erhoehung Dynamikklausel - Praemie nun " + floatstr + \
|
||||
u"€ - Polizze " + self.config.GENERALI1_POLIZZE_NUMBER + " -- " + \
|
||||
' '.join(self.adding_tags(tags, ['scan', 'bill'])) + \
|
||||
u".pdf"
|
||||
|
||||
|
||||
|
||||
# FIXXME: more file documents
|
||||
import pdb; pdb.set_trace()
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8; mode: python; -*-
|
||||
# Time-stamp: <2016-03-07 14:58:28 vk>
|
||||
# Time-stamp: <2016-03-08 17:14:48 vk>
|
||||
|
||||
import unittest
|
||||
import logging
|
||||
|
|
@ -130,6 +130,8 @@ class TestGuessFilename(unittest.TestCase):
|
|||
self.assertTrue(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", [u'xbar']))
|
||||
self.assertFalse(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", [u'xba', u"12345"]))
|
||||
self.assertFalse(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", [u'x', u'xba', u'yuio']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_all_of(u"foo\nbar\nbaz42", ['baz42']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_all_of(u"foo€\nbar\nbaz€42", [u'baz€42']))
|
||||
|
||||
def test_fuzzy_contains_one_of(self):
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue