fuzzy_contains_one_of

This commit is contained in:
Karl Voit 2016-03-06 12:28:55 +01:00
parent ffe08ff4d0
commit 73c0c2f9e0
3 changed files with 56 additions and 5 deletions

1
.gitignore vendored
View file

@ -1,3 +1,4 @@
*.pyc
*.pdf
/flycheck_guessfilename.py
/flycheck_guessfilename_test.py

View file

@ -1,6 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Time-stamp: <2016-03-05 23:43:01 vk>
# Time-stamp: <2016-03-06 12:12:53 vk>
## TODO:
## * fix parts marked with «FIXXME»
@ -18,6 +18,7 @@ import os.path
import time
import logging
from optparse import OptionParser
from fuzzywuzzy import fuzz # for fuzzy comparison of strings
PROG_VERSION_NUMBER = u"0.1"
PROG_VERSION_DATE = u"2016-03-04"
@ -199,7 +200,7 @@ class GuessFilename(object):
return
self.oldfilename = oldfilename
def split_filename_entities(self, filename):
"""
@ -241,6 +242,24 @@ class GuessFilename(object):
return False
def fuzzy_contains_one_of(self, string, entries):
"""
Returns true, if the string contains a similar one of the strings within entries array
"""
assert(type(string) == unicode or type(string) == str)
assert(type(entries) == list)
assert(len(string)>0)
assert(len(entries)>0)
for entry in entries:
similarity = fuzz.partial_ratio(string, entry)
if similarity > 65:
logging.debug("fuzzy_contains_one_of(%s, %s) == %i" % (string, str(entries), similarity))
return True
return False
def has_euro_charge(self, string):
"""
Returns true, if the string contains a number with a -currency

View file

@ -1,6 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8; mode: python; -*-
# Time-stamp: <2016-03-05 23:38:58 vk>
# Time-stamp: <2016-03-06 12:28:12 vk>
import unittest
from guessfilename import GuessFilename
@ -8,10 +8,13 @@ from guessfilename import GuessFilename
class TestGuessFilename(unittest.TestCase):
logging = None
guess_filename = GuessFilename()
guess_filename = None
def setUp(self):
pass
verbose = True
quiet = False
self.guess_filename = GuessFilename()
self.guess_filename.verbose = verbose
def tearDown(self):
pass
@ -40,6 +43,34 @@ class TestGuessFilename(unittest.TestCase):
self.assertFalse(self.guess_filename.contains_one_of(u"foo bar baz", [u'xba']))
self.assertFalse(self.guess_filename.contains_one_of(u"foo bar baz", [u'x', u'xba', u'yuio']))
def test_fuzzy_contains_one_of(self):
## comparing exact strings:
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", ['foo']))
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'foo']))
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'bar']))
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'ba']))
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'x', u'ba', u'yuio']))
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"Kundennummer 1234567890", [u'12345']))
## fuzzy similarities:
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", ['xfoo']))
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'xfoo']))
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'xbar']))
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'xba']))
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'x', u'xba', u'yuio']))
#self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"Kundennummer 1234567890", [u'1234581388']))
#self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"Rundemummer 1234567890", [u'Rundemummer 1234581388']))
#self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"Rundemummer 1234567890", [u'Rumdemummer 1234581388']))
## fuzzy non-matches:
self.assertFalse(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'xyz']))
self.assertFalse(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'111']))
self.assertFalse(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'xby']))
self.assertFalse(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'x', u'yyy', u'yuio']))
#self.assertFalse(self.guess_filename.fuzzy_contains_one_of(u"Kundennummer 1234567890", [u'12345', u' 345 ', u'0987654321']))
#self.assertFalse(self.guess_filename.fuzzy_contains_one_of(u"Kundennummer 1234567890", [u'12345']))
def test_has_euro_charge(self):
self.assertTrue(self.guess_filename.has_euro_charge(u"12,34EUR"))