PDF exception handling now for all exceptions

2026-02-16 05:14:16 +00:00 · 2017-12-24 16:52:48 +01:00 · 2017-12-24 16:52:48 +01:00 · a60331bd22
commit a60331bd22
parent a696c07310
1 changed files with 8 additions and 8 deletions
--- a/guessfilename.py
+++ b/guessfilename.py
@ -651,17 +651,17 @@ class GuessFilename(object):

        try:
            pdffile = PyPDF2.PdfFileReader(open(filename, "rb"))
+            # use first and second page of content only:
+            if pdffile.getNumPages() > 1:
+                content = pdffile.pages[0].extractText() + pdffile.pages[1].extractText()
+            elif pdffile.getNumPages() == 1:
+                content = pdffile.pages[0].extractText()
+            else:
+                logging.error('Could not determine number of pages of PDF content! (skipping content analysis)')
+                return False
        except:
            logging.error('Could not read PDF file content. Skipping its content.')
            return False
-        # use first and second page of content only:
-        if pdffile.getNumPages() > 1:
-            content = pdffile.pages[0].extractText() + pdffile.pages[1].extractText()
-        elif pdffile.getNumPages() == 1:
-            content = pdffile.pages[0].extractText()
-        else:
-            logging.error('Could not determine number of pages of PDF content! (skipping content analysis)')
-            return False

        if len(content) == 0:
            logging.warning('Could read PDF file content but it is empty (skipping content analysis)')