mirror of
https://github.com/novoid/guess-filename.py.git
synced 2026-02-16 05:14:16 +00:00
added error-handling for non-readable PDF files
This commit is contained in:
parent
73ffdddabb
commit
90dea54bc0
1 changed files with 5 additions and 1 deletions
|
|
@ -374,7 +374,11 @@ class GuessFilename(object):
|
|||
filename = os.path.join(dirname, basename)
|
||||
assert os.path.isfile(filename)
|
||||
|
||||
pdffile = PyPDF2.PdfFileReader(open(filename, "rb"))
|
||||
try:
|
||||
pdffile = PyPDF2.PdfFileReader(open(filename, "rb"))
|
||||
except:
|
||||
logging.error('Could not read PDF file content. Skipping its content.')
|
||||
return False
|
||||
## use first and second page of content only:
|
||||
if pdffile.getNumPages() > 1:
|
||||
content = pdffile.pages[0].extractText() + pdffile.pages[1].extractText()
|
||||
|
|
|
|||
Loading…
Reference in a new issue