From a60331bd22424c0e1529d1e470355a875fb2c0e7 Mon Sep 17 00:00:00 2001 From: Karl Voit Date: Sun, 24 Dec 2017 16:52:48 +0100 Subject: [PATCH] PDF exception handling now for all exceptions --- guessfilename.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/guessfilename.py b/guessfilename.py index 2227105..e460b30 100755 --- a/guessfilename.py +++ b/guessfilename.py @@ -651,17 +651,17 @@ class GuessFilename(object): try: pdffile = PyPDF2.PdfFileReader(open(filename, "rb")) + # use first and second page of content only: + if pdffile.getNumPages() > 1: + content = pdffile.pages[0].extractText() + pdffile.pages[1].extractText() + elif pdffile.getNumPages() == 1: + content = pdffile.pages[0].extractText() + else: + logging.error('Could not determine number of pages of PDF content! (skipping content analysis)') + return False except: logging.error('Could not read PDF file content. Skipping its content.') return False - # use first and second page of content only: - if pdffile.getNumPages() > 1: - content = pdffile.pages[0].extractText() + pdffile.pages[1].extractText() - elif pdffile.getNumPages() == 1: - content = pdffile.pages[0].extractText() - else: - logging.error('Could not determine number of pages of PDF content! (skipping content analysis)') - return False if len(content) == 0: logging.warning('Could read PDF file content but it is empty (skipping content analysis)')