org.el: Fix percent substitutions in `org-open-file'

* lisp/org.el (org--open-file-format-command): New function with better
coverage of mailcap RFC 1524 syntax.  Do not replace percent character
in file name or link component, fix substitution of multiple regular
expression groups matched in the link target.
(org-open-file): Use `org--open-file-format-command' instead of inline
code.
* testing/lisp/test-org.el (org-test/org--open-file-format-command):
Tests for `org--open-file-format-command'.

The primary goal of moving code outside of `org-open-file' function is to
make it testable.

It should fix the following issues:
- Maxim Nikulin. greedy substitution in org-open-file.
  Wed, 20 Jan 2021 23:08:35 +0700.
  https://list.orgmode.org/ru9ki4$t5e$1@ciao.gmane.io
- Rodrigo Morales. Org mode links: Open a PDF file at a given page
  and highlight a given string. Tue, 02 Mar 2021 15:07:32 -0500.
  https://list.orgmode.org/87lfb5pbej.fsf@gmail.com
This commit is contained in:
Max Nikulin 2022-08-27 23:51:24 +07:00 committed by Ihor Radchenko
parent 7811fc5925
commit ac2d0a249e
No known key found for this signature in database
GPG Key ID: 6470762A7DA11D8B
2 changed files with 240 additions and 21 deletions

View File

@ -1299,6 +1299,16 @@ Possible values for the file identifier are:
\"evince -p %1 %s\")
to open [[file:document.pdf::5]] with evince at page 5.
Likely, you will need more entries: without page
number; with search pattern; with cross-reference
anchor; some combination of options. Consider simple
pattern here and a Lisp function to determine command
line arguments instead. Passing argument list to
`call-process' or `make-process' directly allows to
avoid treating some character in peculiar file names
as shell specialls causing executing part of file
name as a subcommand.
`directory' Matches a directory
`remote' Matches a remote file, accessible through tramp or efs.
Remote files most likely should be visited through Emacs
@ -8015,6 +8025,74 @@ opened in Emacs."
(when add-auto-mode
(mapcar (lambda (x) (cons (car x) 'emacs)) auto-mode-alist))))
(defun org--open-file-format-command
(mailcap-command file link match-data)
"Format MAILCAP-COMMAND to launch viewer for the FILE.
MAILCAP-COMMAND may be an entry from the `org-file-apps' list or viewer
field from mailcap file loaded to `mailcap-mime-data'. See \"RFC
1524. A User Agent Configuration Mechanism For Multimedia Mail Format
Information\" (URL `https://www.rfc-editor.org/rfc/rfc1524.html') for
details, man page `mailcap(5)' for brief summary, and Info node
`(emacs-mime) mailcap' for specific related to Emacs. Only a part of
mailcap specification is supported.
The following substitutions are interpolated in the MAILCAP-COMMAND
string:
- \"%s\" to FILE name passed through
`convert-standard-filename', so it must be absolute path.
- \"%1\" to \"%9\" groups from MATCH-DATA found in the LINK string by
the regular expression in the key part of the `org-file-apps' entry.
(performed by caller). Not recommended, consider a lisp function
instead of a shell command. For example, the following link in an
Org file
<file:///usr/share/doc/bash/bashref.pdf::#Redirections::allocate a file>
may be handled by an `org-file-apps' entry like
(\"\\\\.pdf\\\\(?:\\\\.[gx]z\\\\|\\\\.bz2\\\\)?::\\\\(#[^:]+\\\\)::\\\\(.+\\\\)\\\\\\='\"
. \"okular --find %2 %s%1\")
Use backslash \"\\\" to quote percent \"%\" or any other character
including backslash itself.
In addition, each argument is passed through `shell-quote-argument',
so quotes around substitutions should not be used. For compliance
with mailcap files shipped e.g. in Debian GNU/Linux, single or double
quotes around substitutions are stripped. It deviates from mailcap
specification that requires file name to be safe for shell and for the
application."
(let ((spec (list (cons ?s (convert-standard-filename file))))
(ngroups (min 9 (- (/ (length match-data) 2) 1))))
(when (> ngroups 0)
(set-match-data match-data)
(dolist (i (number-sequence 1 ngroups))
(push (cons (+ ?0 i) (match-string-no-properties i link)) spec)))
(replace-regexp-in-string
(rx (or (and "\\" (or (group anything) string-end))
(and (optional (group (any "'\"")))
"%"
(or (group anything) string-end)
(optional (group (backref 2))))))
(lambda (fmt)
(let* ((backslash (match-string-no-properties 1 fmt))
(key (match-string 3 fmt))
(value (and key (alist-get (string-to-char key) spec))))
(cond
(backslash)
(value (let ((quot (match-string 2 fmt))
(subst (shell-quote-argument value)))
;; Remove quotes around the file name - we use
;; `shell-quote-argument'.
(if (match-string 4 fmt)
subst
(concat quot subst))))
(t (error "Invalid format `%s'" fmt)))))
mailcap-command nil 'literal)))
;;;###autoload
(defun org-open-file (path &optional in-emacs line search)
"Open the file at PATH.
@ -8112,27 +8190,8 @@ If the file does not exist, throw an error."
(not org-open-non-existing-files))
(user-error "No such file: %s" file))
(cond
((and (stringp cmd) (not (string-match "^\\s-*$" cmd)))
;; Remove quotes around the file name - we'll use shell-quote-argument.
(while (string-match "['\"]%s['\"]" cmd)
(setq cmd (replace-match "%s" t t cmd)))
(setq cmd (replace-regexp-in-string
"%s"
(shell-quote-argument (convert-standard-filename file))
cmd
nil t))
;; Replace "%1", "%2" etc. in command with group matches from regex
(save-match-data
(let ((match-index 1)
(number-of-groups (- (/ (length link-match-data) 2) 1)))
(set-match-data link-match-data)
(while (<= match-index number-of-groups)
(let ((regex (concat "%" (number-to-string match-index)))
(replace-with (match-string match-index dlink)))
(while (string-match regex cmd)
(setq cmd (replace-match replace-with t t cmd))))
(setq match-index (+ match-index 1)))))
((org-string-nw-p cmd)
(setq cmd (org--open-file-format-command cmd file dlink link-match-data))
(save-window-excursion
(message "Running %s...done" cmd)

View File

@ -8421,6 +8421,166 @@ two
(call-interactively #'org-paste-subtree)
(buffer-string)))))
(ert-deftest test-org/org--open-file-format-command ()
"Test `org--open-file-format-command' helper for `org-open-file'."
(let ((system-type 'gnu/linux)) ; Fix behavior of `shell-quote-argument'.
;; No additional groups in `org-file-apps' key.
(let ((file "/file.pdf")
(pattern "\\.pdf\\'"))
(should
(equal "simple /file.pdf"
(and (string-match pattern file)
(org--open-file-format-command
"simple %s" file file (match-data)))))
(should
(equal "single-quotes /file.pdf"
(and (string-match pattern file)
(org--open-file-format-command
"single-quotes '%s'" file file (match-data)))))
(should
(equal "double-quotes /file.pdf"
(and (string-match pattern file)
(org--open-file-format-command
"double-quotes \"%s\"" file file (match-data)))))
(should
(equal "quotes 'mismatch \"/file.pdf'"
(and (string-match pattern file)
(org--open-file-format-command
"quotes 'mismatch \"%s'" file file (match-data)))))
(should
(equal "no subst"
(and (string-match pattern file)
(org--open-file-format-command
"no subst" file file (match-data)))))
(should
(equal "% literal percent 100% %s"
(and (string-match pattern file)
(org--open-file-format-command
"\\% literal percent 100\\% \\%s" file file (match-data)))))
(should
(equal "escape \"/file.pdf\" \\ more"
(and (string-match pattern file)
(org--open-file-format-command
;; Second quote is not escaped.
"escape \\\"%s\" \\\\ more" file file (match-data)))))
(should
(equal "/file.pdf file at start"
(and (string-match pattern file)
(org--open-file-format-command
"%s file at start" file file (match-data)))))
(should
(equal "backslash-newline\n/file.pdf"
(and (string-match pattern file)
(org--open-file-format-command
"backslash-newline\\\n%s" file file (match-data))))))
;; Anchors within target file.
(let ((file "/page-search.pdf")
(link "/page-search.pdf::10::some words")
(pattern "\\.pdf::\\([0-9]+\\)::\\(.*\\)\\'"))
(should
(equal "zathura --page 10 --find some\\ words /page-search.pdf"
(and (string-match pattern link)
(org--open-file-format-command
"zathura --page '%1' --find %2 \"%s\"" file link (match-data)))))
;; Unused %2.
(should
(equal "firefox file:///page-search.pdf\\#page=10"
(and (string-match pattern link)
(org--open-file-format-command
"firefox file://%s\\\\#page=%1" file link (match-data)))))
(should
(equal "adjucent-subst /page-search.pdfsome\\ words10some\\ words"
(and (string-match pattern link)
(org--open-file-format-command
"adjucent-subst %s%2'%1'%2" file link (match-data))))))
;; No more than 9 substitutions are supported.
(let ((file "/many.pdf")
(link "/many.pdf::one:2:3:4:5:6:7:8:9:a:b:c")
(pattern (concat "\\.pdf:"
(mapconcat (lambda (_) ":\\([^:]+\\)")
(number-sequence 1 12)
"")
"\\'")))
(should
(equal "overflow /many.pdf::one:2:3:4:5:6:7:8:9:one0:one1:one2"
(and (string-match pattern link)
(org--open-file-format-command
"overflow %s::%1:%2:%3:%4:%5:%6:%7:%8:%9:%10:%11:%12"
file link (match-data))))))
;; Percent character in link fields does not cause any problem.
(let ((file "/file-%2.pdf")
(link "/file-%2.pdf::anchor-%3::search %1")
(pattern "\\.pdf::\\([^:]+\\)::\\(.+\\)\\'"))
(should
(equal "percents --find search\\ \\%1 file:///file-\\%2.pdf\\#anchor-\\%3"
(and (string-match pattern link)
(org--open-file-format-command
"percents --find %2 file://%s\\\\#%1"
file link (match-data))))))
;; Errors.
(let ((file "/error.pdf")
(pattern "\\.pdf\\'"))
(let* ((err (should-error
(and (string-match pattern file)
(org--open-file-format-command
"trailing-percent %s %" file file (match-data)))
:type 'error))
(err-text (cadr err)))
(should-not (unless (and (stringp err-text)
(string-match-p "\\`Invalid format .*%" err-text))
err)))
(let* ((err (should-error
(and (string-match pattern file)
(org--open-file-format-command
"trailing-backslash %s \\" file file (match-data)))
:type 'error))
(err-text (cadr err)))
(should-not (unless (and (stringp err-text)
(string-match-p "\\`Invalid format .*\\\\" err-text))
err)))
(let* ((err (should-error
(and (string-match pattern file)
(org--open-file-format-command
"percent-newline %\n%s" file file (match-data)))
:type 'error))
(err-text (cadr err)))
(should-not (unless (and (stringp err-text)
(string-match-p "\\`Invalid format .*%\n" err-text))
err)))
;; Mailcap escape for "%" is "\%", not "%%".
(let* ((err (should-error
(and (string-match pattern file)
(org--open-file-format-command
"percent-percent %s%%" file file (match-data)))
:type 'error))
(err-text (cadr err)))
(should-not (unless (and (stringp err-text)
(string-match-p "\\`Invalid format .*%%" err-text))
err)))
;; Mailcap allows "%t" for MIME type, but Org has no such information.
(let* ((err (should-error
(and (string-match pattern file)
(org--open-file-format-command
"percent-t-unsupported --type '%t' %s" file file (match-data)))
:type 'error))
(err-text (cadr err)))
(should-not (unless (and (stringp err-text)
(string-match-p "\\`Invalid format .*%t" err-text))
err))))
;; Optional regular expression groups have no point in `org-file-apps' patterns.
(let* ((file "/error.pdf")
(link "/error.pdf::1")
(pattern "\\.pdf::\\([^:]+\\)\\(?:::\\(.+\\)\\)?\\'")
(err (should-error
(and (string-match pattern link)
(org--open-file-format-command
"no-such-match --search %2 %s" file link (match-data)))
:type 'error))
(err-text (cadr err)))
(should-not (unless (and (stringp err-text)
(string-match-p "\\`Invalid format.*%2" err-text))
err)))))
(provide 'test-org)
;;; test-org.el ends here