diff --git a/guessfilename.py b/guessfilename.py index 5ab7e30..23f7851 100755 --- a/guessfilename.py +++ b/guessfilename.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# Time-stamp: <2016-03-06 19:41:42 vk> +# Time-stamp: <2016-03-07 13:47:31 vk> # TODO: # * fix parts marked with «FIXXME» @@ -206,6 +206,27 @@ class GuessFilename(object): return False + def fuzzy_contains_all_of(self, string, entries): + """ + Returns true, if the string contains all similar ones of the strings within the entries array + """ + + assert(type(string) == unicode or type(string) == str) + assert(type(entries) == list) + assert(len(string) > 0) + assert(len(entries) > 0) + + for entry in entries: + similarity = fuzz.partial_ratio(string, entry) + if similarity > 64: + #logging.debug(u"MATCH fuzzy_contains_all_of(%s, %s) == %i" % (string, str(entry), similarity)) + pass + else: + #logging.debug(u"¬ MATCH fuzzy_contains_all_of(%s, %s) == %i" % (string, str(entry), similarity)) + return False + + return True + def has_euro_charge(self, string): """ Returns true, if the string contains a number with a €-currency @@ -331,6 +352,16 @@ class GuessFilename(object): ' '.join(self.adding_tags(tags, ['scan', 'finance', 'private'])) + \ u".pdf" + # 2015-11-20 Kirchenbeitrag 12,34 EUR -- scan taxes bill.pdf + if self.fuzzy_contains_one_of(content, ["4294-0208"]) and \ + self.fuzzy_contains_one_of(content, ["AT086000000007042401"]) and \ + self.fuzzy_contains_one_of(content, ["Kontonachricht"]) and \ + datetimestr: + return datetimestr + \ + u" easybank - neue TAN-Liste -- " + \ + ' '.join(self.adding_tags(tags, ['scan', 'finance', 'private'])) + \ + u".pdf" + # FIXXME: more file documents import pdb; pdb.set_trace() diff --git a/guessfilename_test.py b/guessfilename_test.py index 0b26b36..c3210de 100644 --- a/guessfilename_test.py +++ b/guessfilename_test.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8; mode: python; -*- -# Time-stamp: <2016-03-06 18:58:52 vk> +# Time-stamp: <2016-03-07 13:52:10 vk> import unittest import logging @@ -117,6 +117,20 @@ class TestGuessFilename(unittest.TestCase): self.assertFalse(self.guess_filename.contains_one_of(u"foo bar baz", [u'xba'])) self.assertFalse(self.guess_filename.contains_one_of(u"foo bar baz", [u'x', u'xba', u'yuio'])) + def test_fuzzy_contains_all_of(self): + + self.assertTrue(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", ['foo'])) + self.assertTrue(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", [u'foo'])) + self.assertTrue(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", [u'bar'])) + self.assertTrue(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", [u'ba'])) + self.assertTrue(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", [u'foo', u"bar", u"baz"])) + self.assertFalse(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", [u'x', u'ba', u'yuio'])) + self.assertTrue(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", ['xfoo'])) + self.assertTrue(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", [u'xfoo'])) + self.assertTrue(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", [u'xbar'])) + self.assertFalse(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", [u'xba', u"12345"])) + self.assertFalse(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", [u'x', u'xba', u'yuio'])) + def test_fuzzy_contains_one_of(self): # comparing exact strings: