mirror of
https://github.com/novoid/guess-filename.py.git
synced 2026-02-16 13:24:15 +00:00
move check for PDF file before loading PDF parsing library
This commit is contained in:
parent
0a50af236a
commit
4495f83aff
1 changed files with 7 additions and 7 deletions
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
PROG_VERSION = u"Time-stamp: <2017-12-08 15:21:44 vk>"
|
||||
PROG_VERSION = u"Time-stamp: <2017-12-08 15:40:50 vk>"
|
||||
|
||||
|
||||
# TODO:
|
||||
|
|
@ -627,6 +627,12 @@ class GuessFilename(object):
|
|||
filename = os.path.join(dirname, basename)
|
||||
assert os.path.isfile(filename)
|
||||
|
||||
datetimestr, basefilename, tags, extension = self.split_filename_entities(basename)
|
||||
|
||||
if extension.lower() != 'pdf':
|
||||
logging.debug("File is not a PDF file and thus can't be parsed by this script: %s" % filename)
|
||||
return False
|
||||
|
||||
try:
|
||||
pdffile = PyPDF2.PdfFileReader(open(filename, "rb"))
|
||||
except:
|
||||
|
|
@ -645,12 +651,6 @@ class GuessFilename(object):
|
|||
logging.warning('Could read PDF file content but it is empty (skipping content analysis)')
|
||||
return False
|
||||
|
||||
datetimestr, basefilename, tags, extension = self.split_filename_entities(basename)
|
||||
|
||||
if extension.lower() != 'pdf':
|
||||
logging.debug("File is not a PDF file and thus can't be parsed by this script: %s" % filename)
|
||||
return False
|
||||
|
||||
# 2010-06-08 easybank - neue TAN-Liste -- scan private.pdf
|
||||
if self.fuzzy_contains_all_of(content, ["Transaktionsnummern (TANs)", "Ihre TAN-Liste in Verlust geraten"]) and \
|
||||
datetimestr:
|
||||
|
|
|
|||
Loading…
Reference in a new issue