move check for PDF file before loading PDF parsing library

2026-02-16 13:24:15 +00:00 · 2017-12-08 15:44:05 +01:00 · 2017-12-08 15:44:05 +01:00 · 4495f83aff
commit 4495f83aff
parent 0a50af236a
1 changed files with 7 additions and 7 deletions
--- a/guessfilename.py
+++ b/guessfilename.py
@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-PROG_VERSION = u"Time-stamp: <2017-12-08 15:21:44 vk>"
+PROG_VERSION = u"Time-stamp: <2017-12-08 15:40:50 vk>"


 # TODO:
@ -627,6 +627,12 @@ class GuessFilename(object):
        filename = os.path.join(dirname, basename)
        assert os.path.isfile(filename)

+        datetimestr, basefilename, tags, extension = self.split_filename_entities(basename)
+
+        if extension.lower() != 'pdf':
+            logging.debug("File is not a PDF file and thus can't be parsed by this script: %s" % filename)
+            return False
+
        try:
            pdffile = PyPDF2.PdfFileReader(open(filename, "rb"))
        except:
@ -645,12 +651,6 @@ class GuessFilename(object):
            logging.warning('Could read PDF file content but it is empty (skipping content analysis)')
            return False

-        datetimestr, basefilename, tags, extension = self.split_filename_entities(basename)
-
-        if extension.lower() != 'pdf':
-            logging.debug("File is not a PDF file and thus can't be parsed by this script: %s" % filename)
-            return False
-
        # 2010-06-08 easybank - neue TAN-Liste -- scan private.pdf
        if self.fuzzy_contains_all_of(content, ["Transaktionsnummern (TANs)", "Ihre TAN-Liste in Verlust geraten"]) and \
           datetimestr: