forked from Github/guess-filename.py
fuzzy_contains_all_of
This commit is contained in:
parent
d75416db96
commit
cc615cff3a
2 changed files with 47 additions and 2 deletions
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Time-stamp: <2016-03-06 19:41:42 vk>
|
||||
# Time-stamp: <2016-03-07 13:47:31 vk>
|
||||
|
||||
# TODO:
|
||||
# * fix parts marked with «FIXXME»
|
||||
|
|
@ -206,6 +206,27 @@ class GuessFilename(object):
|
|||
|
||||
return False
|
||||
|
||||
def fuzzy_contains_all_of(self, string, entries):
|
||||
"""
|
||||
Returns true, if the string contains all similar ones of the strings within the entries array
|
||||
"""
|
||||
|
||||
assert(type(string) == unicode or type(string) == str)
|
||||
assert(type(entries) == list)
|
||||
assert(len(string) > 0)
|
||||
assert(len(entries) > 0)
|
||||
|
||||
for entry in entries:
|
||||
similarity = fuzz.partial_ratio(string, entry)
|
||||
if similarity > 64:
|
||||
#logging.debug(u"MATCH fuzzy_contains_all_of(%s, %s) == %i" % (string, str(entry), similarity))
|
||||
pass
|
||||
else:
|
||||
#logging.debug(u"¬ MATCH fuzzy_contains_all_of(%s, %s) == %i" % (string, str(entry), similarity))
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def has_euro_charge(self, string):
|
||||
"""
|
||||
Returns true, if the string contains a number with a €-currency
|
||||
|
|
@ -331,6 +352,16 @@ class GuessFilename(object):
|
|||
' '.join(self.adding_tags(tags, ['scan', 'finance', 'private'])) + \
|
||||
u".pdf"
|
||||
|
||||
# 2015-11-20 Kirchenbeitrag 12,34 EUR -- scan taxes bill.pdf
|
||||
if self.fuzzy_contains_one_of(content, ["4294-0208"]) and \
|
||||
self.fuzzy_contains_one_of(content, ["AT086000000007042401"]) and \
|
||||
self.fuzzy_contains_one_of(content, ["Kontonachricht"]) and \
|
||||
datetimestr:
|
||||
return datetimestr + \
|
||||
u" easybank - neue TAN-Liste -- " + \
|
||||
' '.join(self.adding_tags(tags, ['scan', 'finance', 'private'])) + \
|
||||
u".pdf"
|
||||
|
||||
# FIXXME: more file documents
|
||||
import pdb; pdb.set_trace()
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8; mode: python; -*-
|
||||
# Time-stamp: <2016-03-06 18:58:52 vk>
|
||||
# Time-stamp: <2016-03-07 13:52:10 vk>
|
||||
|
||||
import unittest
|
||||
import logging
|
||||
|
|
@ -117,6 +117,20 @@ class TestGuessFilename(unittest.TestCase):
|
|||
self.assertFalse(self.guess_filename.contains_one_of(u"foo bar baz", [u'xba']))
|
||||
self.assertFalse(self.guess_filename.contains_one_of(u"foo bar baz", [u'x', u'xba', u'yuio']))
|
||||
|
||||
def test_fuzzy_contains_all_of(self):
|
||||
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", ['foo']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", [u'foo']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", [u'bar']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", [u'ba']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", [u'foo', u"bar", u"baz"]))
|
||||
self.assertFalse(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", [u'x', u'ba', u'yuio']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", ['xfoo']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", [u'xfoo']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", [u'xbar']))
|
||||
self.assertFalse(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", [u'xba', u"12345"]))
|
||||
self.assertFalse(self.guess_filename.fuzzy_contains_all_of(u"foo bar baz", [u'x', u'xba', u'yuio']))
|
||||
|
||||
def test_fuzzy_contains_one_of(self):
|
||||
|
||||
# comparing exact strings:
|
||||
|
|
|
|||
Loading…
Reference in a new issue