mirror of
https://github.com/novoid/guess-filename.py.git
synced 2026-02-16 13:24:15 +00:00
fuzzy_contains_one_of
This commit is contained in:
parent
ffe08ff4d0
commit
73c0c2f9e0
3 changed files with 56 additions and 5 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -1,3 +1,4 @@
|
|||
*.pyc
|
||||
*.pdf
|
||||
/flycheck_guessfilename.py
|
||||
/flycheck_guessfilename_test.py
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Time-stamp: <2016-03-05 23:43:01 vk>
|
||||
# Time-stamp: <2016-03-06 12:12:53 vk>
|
||||
|
||||
## TODO:
|
||||
## * fix parts marked with «FIXXME»
|
||||
|
|
@ -18,6 +18,7 @@ import os.path
|
|||
import time
|
||||
import logging
|
||||
from optparse import OptionParser
|
||||
from fuzzywuzzy import fuzz # for fuzzy comparison of strings
|
||||
|
||||
PROG_VERSION_NUMBER = u"0.1"
|
||||
PROG_VERSION_DATE = u"2016-03-04"
|
||||
|
|
@ -199,7 +200,7 @@ class GuessFilename(object):
|
|||
return
|
||||
|
||||
self.oldfilename = oldfilename
|
||||
|
||||
|
||||
|
||||
def split_filename_entities(self, filename):
|
||||
"""
|
||||
|
|
@ -241,6 +242,24 @@ class GuessFilename(object):
|
|||
|
||||
return False
|
||||
|
||||
def fuzzy_contains_one_of(self, string, entries):
|
||||
"""
|
||||
Returns true, if the string contains a similar one of the strings within entries array
|
||||
"""
|
||||
|
||||
assert(type(string) == unicode or type(string) == str)
|
||||
assert(type(entries) == list)
|
||||
assert(len(string)>0)
|
||||
assert(len(entries)>0)
|
||||
|
||||
for entry in entries:
|
||||
similarity = fuzz.partial_ratio(string, entry)
|
||||
if similarity > 65:
|
||||
logging.debug("fuzzy_contains_one_of(%s, %s) == %i" % (string, str(entries), similarity))
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def has_euro_charge(self, string):
|
||||
"""
|
||||
Returns true, if the string contains a number with a €-currency
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8; mode: python; -*-
|
||||
# Time-stamp: <2016-03-05 23:38:58 vk>
|
||||
# Time-stamp: <2016-03-06 12:28:12 vk>
|
||||
|
||||
import unittest
|
||||
from guessfilename import GuessFilename
|
||||
|
|
@ -8,10 +8,13 @@ from guessfilename import GuessFilename
|
|||
class TestGuessFilename(unittest.TestCase):
|
||||
|
||||
logging = None
|
||||
guess_filename = GuessFilename()
|
||||
guess_filename = None
|
||||
|
||||
def setUp(self):
|
||||
pass
|
||||
verbose = True
|
||||
quiet = False
|
||||
self.guess_filename = GuessFilename()
|
||||
self.guess_filename.verbose = verbose
|
||||
|
||||
def tearDown(self):
|
||||
pass
|
||||
|
|
@ -40,6 +43,34 @@ class TestGuessFilename(unittest.TestCase):
|
|||
self.assertFalse(self.guess_filename.contains_one_of(u"foo bar baz", [u'xba']))
|
||||
self.assertFalse(self.guess_filename.contains_one_of(u"foo bar baz", [u'x', u'xba', u'yuio']))
|
||||
|
||||
def test_fuzzy_contains_one_of(self):
|
||||
|
||||
## comparing exact strings:
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", ['foo']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'foo']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'bar']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'ba']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'x', u'ba', u'yuio']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"Kundennummer 1234567890", [u'12345']))
|
||||
|
||||
## fuzzy similarities:
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", ['xfoo']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'xfoo']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'xbar']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'xba']))
|
||||
self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'x', u'xba', u'yuio']))
|
||||
#self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"Kundennummer 1234567890", [u'1234581388']))
|
||||
#self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"Rundemummer 1234567890", [u'Rundemummer 1234581388']))
|
||||
#self.assertTrue(self.guess_filename.fuzzy_contains_one_of(u"Rundemummer 1234567890", [u'Rumdemummer 1234581388']))
|
||||
|
||||
## fuzzy non-matches:
|
||||
self.assertFalse(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'xyz']))
|
||||
self.assertFalse(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'111']))
|
||||
self.assertFalse(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'xby']))
|
||||
self.assertFalse(self.guess_filename.fuzzy_contains_one_of(u"foo bar baz", [u'x', u'yyy', u'yuio']))
|
||||
#self.assertFalse(self.guess_filename.fuzzy_contains_one_of(u"Kundennummer 1234567890", [u'12345', u' 345 ', u'0987654321']))
|
||||
#self.assertFalse(self.guess_filename.fuzzy_contains_one_of(u"Kundennummer 1234567890", [u'12345']))
|
||||
|
||||
def test_has_euro_charge(self):
|
||||
|
||||
self.assertTrue(self.guess_filename.has_euro_charge(u"12,34EUR"))
|
||||
|
|
|
|||
Loading…
Reference in a new issue