Add support for EPUB, CBZ, FB2 and (O)XPS extension to doc view

* doc/emacs/misc.texi (Document View): Add requirements for new
extensions (i.e. mutool).

* lisp/doc-view.el (doc-view): Additionally update preliminary comment
(doc-view-custom-set-epub-font-size): redraw image after setting
(doc-view-unoconv-program): Put code all on one line
(doc-view-doc-type): Update docstring.
(doc-view-kill-proc): Fix comment indentation
(doc-view-mode-p):
Add check for new extensions and alternative check for PDF
(doc-view-pdf/ps->png): Associate new extension with png converter
(doc-view-convert-current-doc): Handle new extensions like PDF's
(doc-view-set-doc-type): Set correct doc-type for new extensions.

* lisp/files.el (auto-mode-alist):
Associate new extension types with doc-view.
This commit is contained in:
Daniel Nicolai 2022-01-27 17:03:38 +01:00 committed by Lars Ingebrigtsen
parent b9239954b6
commit d633db5189
3 changed files with 113 additions and 63 deletions

View file

@ -455,20 +455,27 @@ servers the user has connected to. If this variable is @code{t},
@cindex PostScript file
@cindex OpenDocument file
@cindex Microsoft Office file
@cindex EPUB file
@cindex CBZ file
@cindex FB2 file
@cindex XPS file
@cindex OXPS file
@cindex DocView mode
@cindex mode, DocView
@cindex document viewer (DocView)
@findex doc-view-mode
DocView mode is a major mode for viewing DVI, PostScript (PS), PDF,
OpenDocument, and Microsoft Office documents. It provides features
such as slicing, zooming, and searching inside documents. It works by
converting the document to a set of images using the @command{gs}
(GhostScript) or @command{mudraw}/@command{pdfdraw} (MuPDF) commands
and other external tools @footnote{For PostScript files, GhostScript
is a hard requirement. For DVI files, @code{dvipdf} or @code{dvipdfm}
is needed. For OpenDocument and Microsoft Office documents, the
@code{unoconv} tool is needed.}, and displaying those images.
OpenDocument, Microsoft Office, EPUB, CBZ, FB2, XPS and OXPS
documents. It provides features such as slicing, zooming, and
searching inside documents. It works by converting the document to a
set of images using the @command{gs} (GhostScript) or
@command{pdfdraw}/@command{mutool draw} (MuPDF) commands and other
external tools @footnote{PostScript files require GhostScript, DVI
files require @code{dvipdf} or @code{dvipdfm}, OpenDocument and
Microsoft Office documents require the @code{unoconv} tool, and EPUB,
CBZ, FB2, XPS and OXPS files require @code{mutool} to be available.},
and displaying those images.
@findex doc-view-toggle-display
@findex doc-view-minor-mode

View file

@ -3,7 +3,7 @@
;; Copyright (C) 2007-2022 Free Software Foundation, Inc.
;;
;; Author: Tassilo Horn <tsdh@gnu.org>
;; Keywords: files, pdf, ps, dvi
;; Keywords: files, pdf, ps, dvi, djvu, epub, cbz, fb2, xps, openxps
;; This file is part of GNU Emacs.
@ -25,17 +25,19 @@
;; Viewing PS/PDF/DVI files requires Ghostscript, `dvipdf' (comes with
;; Ghostscript) or `dvipdfm' (comes with teTeX or TeXLive) and
;; `pdftotext', which comes with xpdf (https://www.foolabs.com/xpdf/)
;; or poppler (https://poppler.freedesktop.org/).
;; Djvu documents require `ddjvu' (from DjVuLibre).
;; ODF files require `soffice' (from LibreOffice).
;; or poppler (https://poppler.freedesktop.org/). EPUB, CBZ, FB2, XPS
;; and OXPS documents require `mutool' which comes with mupdf
;; (https://mupdf.com/index.html). Djvu documents require `ddjvu'
;; (from DjVuLibre). ODF files require `soffice' (from LibreOffice).
;;; Commentary:
;; DocView is a document viewer for Emacs. It converts a number of
;; document formats (including PDF, PS, DVI, Djvu and ODF files) to a
;; set of PNG files, one PNG for each page, and displays the PNG
;; images inside an Emacs buffer. This buffer uses `doc-view-mode'
;; which provides convenient key bindings for browsing the document.
;; document formats (including PDF, PS, DVI, Djvu, ODF, EPUB, CBZ,
;; FB2, XPS and OXPS files) to a set of PNG (or TIFF for djvu) files,
;; one image for each page, and displays the images inside an Emacs
;; buffer. This buffer uses `doc-view-mode' which provides convenient
;; key bindings for browsing the document.
;;
;; To use it simply open a document file with
;;
@ -147,7 +149,10 @@
;;;; Customization Options
(defgroup doc-view nil
"In-buffer viewer for PDF, PostScript, DVI, and DJVU files."
"In-buffer document viewer.
The viewer handles PDF, PostScript, DVI, DJVU, ODF, EPUB, CBZ,
FB2, XPS and OXPS files, if the appropriate converter programs
are available (see Info node `(emacs)Document View')"
:link '(function-link doc-view)
:version "22.2"
:group 'applications
@ -221,6 +226,11 @@
Higher values result in larger images."
:type 'number)
(defcustom doc-view-epub-font-size nil
"Font size in points for EPUB layout."
:type 'integer
:set #'doc-view-custom-set-epub-font-size)
(defcustom doc-view-scale-internally t
"Whether we should try to rescale images ourselves.
If nil, the document is re-rendered every time the scaling factor is modified.
@ -256,9 +266,7 @@ If this and `doc-view-dvipdfm-program' are set,
`doc-view-dvipdf-program' will be preferred."
:type 'file)
(define-obsolete-variable-alias 'doc-view-unoconv-program
'doc-view-odf->pdf-converter-program
"24.4")
(define-obsolete-variable-alias 'doc-view-unoconv-program 'doc-view-odf->pdf-converter-program "24.4")
(defcustom doc-view-odf->pdf-converter-program
(cond
@ -382,7 +390,8 @@ the (uncompressed, extracted) file residing in
(defvar doc-view-doc-type nil
"The type of document in the current buffer.
Can be `dvi', `pdf', `ps', `djvu' or `odf'.")
Can be `dvi', `pdf', `ps', `djvu', `odf', 'epub', `cbz', `fb2',
`'xps' or `oxps'.")
(defvar doc-view-single-page-converter-function nil
"Function to call to convert a single page of the document to a bitmap file.
@ -464,17 +473,17 @@ Typically \"page-%s.png\".")
;; It's normal for this operation to result in a very large undo entry.
(setq-local undo-outer-limit (* 2 (buffer-size))))
(cl-labels ((revert ()
(let ((revert-buffer-preserve-modes t))
(apply orig-fun args)
;; Update the cached version of the pdf file,
;; too. This is the one that's used when
;; rendering (bug#26996).
(unless (equal buffer-file-name
doc-view--buffer-file-name)
;; FIXME: Lars says he needed to recreate
;; the dir, we should figure out why.
(doc-view-make-safe-dir doc-view-cache-directory)
(write-region nil nil doc-view--buffer-file-name)))))
(let ((revert-buffer-preserve-modes t))
(apply orig-fun args)
;; Update the cached version of the pdf file,
;; too. This is the one that's used when
;; rendering (bug#26996).
(unless (equal buffer-file-name
doc-view--buffer-file-name)
;; FIXME: Lars says he needed to recreate
;; the dir, we should figure out why.
(doc-view-make-safe-dir doc-view-cache-directory)
(write-region nil nil doc-view--buffer-file-name)))))
(if (and (eq 'pdf doc-view-doc-type)
(executable-find "pdfinfo"))
;; We don't want to revert if the PDF file is corrupted which
@ -586,6 +595,15 @@ Typically \"page-%s.png\".")
(defmacro doc-view-current-image () '(image-mode-window-get 'image))
(defmacro doc-view-current-slice () '(image-mode-window-get 'slice))
(defun doc-view-custom-set-epub-font-size (option-name new-value)
(set-default option-name new-value)
(dolist (x (buffer-list))
(with-current-buffer x
(when (eq doc-view-doc-type 'epub)
(delete-directory doc-view--current-cache-dir t)
(doc-view-initiate-display)
(doc-view-goto-page (doc-view-current-page))))))
(defun doc-view-last-page-number ()
(length doc-view--current-files))
@ -738,7 +756,7 @@ at the top edge of the page moves to the previous page."
(interactive)
(while (consp doc-view--current-converter-processes)
(ignore-errors ;; Some entries might not be processes, and maybe
;; some are dead already?
; some are dead already?
(kill-process (pop doc-view--current-converter-processes))))
(when doc-view--current-timer
(cancel-timer doc-view--current-timer)
@ -799,8 +817,8 @@ It's a subdirectory of `doc-view-cache-directory'."
;;;###autoload
(defun doc-view-mode-p (type)
"Return non-nil if document type TYPE is available for `doc-view'.
Document types are symbols like `dvi', `ps', `pdf', or `odf' (any
OpenDocument format)."
Document types are symbols like `dvi', `ps', `pdf', `epub',
`cbz', `fb2', `xps', `oxps', or`odf' (any OpenDocument format)."
(and (display-graphic-p)
(image-type-available-p 'png)
(cond
@ -811,16 +829,22 @@ OpenDocument format)."
(and doc-view-dvipdfm-program
(executable-find doc-view-dvipdfm-program)))))
((memq type '(postscript ps eps pdf))
(or (and doc-view-ghostscript-program
(or (and doc-view-ghostscript-program
(executable-find doc-view-ghostscript-program))
(and doc-view-pdfdraw-program
(executable-find doc-view-pdfdraw-program))))
;; for pdf also check for `doc-view-pdfdraw-program'
(when (eq type 'pdf)
(and doc-view-pdfdraw-program
(executable-find doc-view-pdfdraw-program)))))
((eq type 'odf)
(and doc-view-odf->pdf-converter-program
(executable-find doc-view-odf->pdf-converter-program)
(doc-view-mode-p 'pdf)))
((eq type 'djvu)
(executable-find "ddjvu"))
((memq type '(epub cbz fb2 xps oxps))
;; first check if `doc-view-pdfdraw-program' is set to mutool
(and (string= doc-view-pdfdraw-program "mutool")
(executable-find "mutool")))
(t ;; unknown image type
nil))))
@ -1053,7 +1077,7 @@ Should be invoked when the cached images aren't up-to-date."
;; some file-name-handler-managed dir, for example).
(let* ((default-directory (or (unhandled-file-name-directory
default-directory)
(expand-file-name "~/")))
(expand-file-name "~/")))
(proc (apply #'start-process name doc-view-conversion-buffer
program args)))
(push proc doc-view--current-converter-processes)
@ -1139,14 +1163,17 @@ The test is performed using `doc-view-pdfdraw-program'."
(search-forward "error: cannot authenticate password" nil t)))
(defun doc-view-pdf->png-converter-mupdf (pdf png page callback)
(let ((pdf-passwd (if (doc-view-pdf-password-protected-pdfdraw-p pdf)
(read-passwd "Enter password for PDF file: "))))
(let* ((pdf-passwd (if (doc-view-pdf-password-protected-pdfdraw-p pdf)
(read-passwd "Enter password for PDF file: ")))
(options `(,(concat "-o" png)
,(format "-r%d" (round doc-view-resolution))
,@(if pdf-passwd `("-p" ,pdf-passwd)))))
(when (and (eq doc-view-doc-type 'epub) doc-view-epub-font-size)
(setq options (append options (list (format "-S%s" doc-view-epub-font-size)))))
(doc-view-start-process
"pdf->png" doc-view-pdfdraw-program
`(,@(doc-view-pdfdraw-program-subcommand)
,(concat "-o" png)
,(format "-r%d" (round doc-view-resolution))
,@(if pdf-passwd `("-p" ,pdf-passwd))
,@options
,pdf
,@(if page `(,(format "%d" page))))
callback)))
@ -1227,20 +1254,20 @@ Start by converting PAGES, and then the rest."
(let ((rest (cdr pages)))
(funcall doc-view-single-page-converter-function
pdf (format png (car pages)) (car pages)
(lambda ()
(if rest
(doc-view-document->bitmap pdf png rest)
;; Yippie, the important pages are done, update the display.
(clear-image-cache)
;; For the windows that have a message (like "Welcome to
;; DocView") display property, clearing the image cache is
;; not sufficient.
(dolist (win (get-buffer-window-list (current-buffer) nil 'visible))
(with-selected-window win
(when (stringp (overlay-get (doc-view-current-overlay) 'display))
(doc-view-goto-page (doc-view-current-page)))))
;; Convert the rest of the pages.
(doc-view-pdf/ps->png pdf png)))))))
(lambda ()
(if rest
(doc-view-document->bitmap pdf png rest)
;; Yippie, the important pages are done, update the display.
(clear-image-cache)
;; For the windows that have a message (like "Welcome to
;; DocView") display property, clearing the image cache is
;; not sufficient.
(dolist (win (get-buffer-window-list (current-buffer) nil 'visible))
(with-selected-window win
(when (stringp (overlay-get (doc-view-current-overlay) 'display))
(doc-view-goto-page (doc-view-current-page)))))
;; Convert the rest of the pages.
(doc-view-pdf/ps->png pdf png)))))))
(defun doc-view-pdf->txt (pdf txt callback)
"Convert PDF to TXT asynchronously and call CALLBACK when finished."
@ -1337,7 +1364,9 @@ Those files are saved in the directory given by the function
;; Rename to doc.pdf
(rename-file opdf pdf)
(doc-view-pdf/ps->png pdf png-file)))))
((or 'pdf 'djvu)
;; The doc-view-mode-p check ensures that epub, cbz, fb2 and
;; (o)xps are handled with mutool
((or 'pdf 'djvu 'epub 'cbz 'fb2 'xps 'oxps)
(let ((pages (doc-view-active-pages)))
;; Convert doc to bitmap images starting with the active pages.
(doc-view-document->bitmap doc-view--buffer-file-name png-file pages)))
@ -1432,7 +1461,7 @@ dragging it to its bottom-right corner. See also
(defun doc-view-guess-paper-size (iw ih)
"Guess the paper size according to the aspect ratio."
(cl-labels ((div (x y)
(round (/ (* 100.0 x) y))))
(round (/ (* 100.0 x) y))))
(let ((ar (div iw ih))
(al (mapcar (lambda (l)
(list (div (nth 1 l) (nth 2 l)) (car l)))
@ -1869,6 +1898,8 @@ If BACKWARD is non-nil, jump to the previous match."
("dvi" dvi)
;; PDF
("pdf" pdf) ("epdf" pdf)
;; EPUB
("epub" epub)
;; PostScript
("ps" ps) ("eps" ps)
;; DjVu
@ -1880,7 +1911,13 @@ If BACKWARD is non-nil, jump to the previous match."
;; Microsoft Office formats (also handled by the odf
;; conversion chain).
("doc" odf) ("docx" odf) ("xls" odf) ("xlsx" odf)
("ppt" odf) ("pps" odf) ("pptx" odf) ("rtf" odf))
("ppt" odf) ("pps" odf) ("pptx" odf) ("rtf" odf)
;; CBZ
("cbz" cbz)
;; FB2
("fb2" fb2)
;; (Open)XPS
("xps" xps) ("oxps" oxps))
t))))
(content-types
(save-excursion
@ -1889,7 +1926,13 @@ If BACKWARD is non-nil, jump to the previous match."
((looking-at "%!") '(ps))
((looking-at "%PDF") '(pdf))
((looking-at "\367\002") '(dvi))
((looking-at "AT&TFORM") '(djvu))))))
((looking-at "AT&TFORM") '(djvu))
;; The following pattern actually is for recognizing
;; zip-archives, so that this same association is used for
;; cbz files. This is fine, as cbz files should be handled
;; like epub anyway.
((looking-at "PK") '(epub))
))))
(setq-local
doc-view-doc-type
(car (or (nreverse (seq-intersection name-types content-types #'eq))

View file

@ -2928,7 +2928,7 @@ ARC\\|ZIP\\|LZH\\|LHA\\|ZOO\\|[JEW]AR\\|XPI\\|RAR\\|CBR\\|7Z\\|SQUASHFS\\)\\'" .
("\\.\\(diffs?\\|patch\\|rej\\)\\'" . diff-mode)
("\\.\\(dif\\|pat\\)\\'" . diff-mode) ; for MS-DOS
("\\.[eE]?[pP][sS]\\'" . ps-mode)
("\\.\\(?:PDF\\|DVI\\|OD[FGPST]\\|DOCX\\|XLSX?\\|PPTX?\\|pdf\\|djvu\\|dvi\\|od[fgpst]\\|docx\\|xlsx?\\|pptx?\\)\\'" . doc-view-mode-maybe)
("\\.\\(?:PDF\\|EPUB\\|CBZ\\|FB2\\|O?XPS\\|DVI\\|OD[FGPST]\\|DOCX\\|XLSX?\\|PPTX?\\|pdf\\|epub\\|cbz\\|fb2\\|o?xps\\|djvu\\|dvi\\|od[fgpst]\\|docx\\|xlsx?\\|pptx?\\)\\'" . doc-view-mode-maybe)
("configure\\.\\(ac\\|in\\)\\'" . autoconf-mode)
("\\.s\\(v\\|iv\\|ieve\\)\\'" . sieve-mode)
("BROWSE\\'" . ebrowse-tree-mode)