forked from mirrors/org-mode
Work around regexp size limitation for large number of link targets
* lisp/ol.el (org-target-link-regexp-limit): New constant defining maximum regexp limit where `org-target-link-regexp' is still safe to use without triggering "Regexp too long" error. (org-target-link-regexps): New variable holding a series of shorter regexps to be used instead of too long single `org-target-link-regexp'. (org--re-list-search-forward): New function like `re-search-forward', but accepting a list of regexps. (org--re-list-looking-at): New function like `looking-at', but accepting a list of regexps. (org-update-radio-target-regexp): When `org-target-link-regexp' is too long, set `org-target-link-regexps', partitioning the link target list into smaller regexps. * lisp/org-element.el (org-element-link-parser): (org-element--object-lex): * lisp/org.el (org-activate-target-links): Use `org--re-list-search-forward' and `org--re-list-looking-at' when `org-target-link-regexps' is non-nil. * testing/lisp/test-org-element.el (test-org-element/link-parser): Add tests. Reported-by: Rudolf Adamkovič <salutis@me.com> Link: https://list.orgmode.org/orgmode/m2lenax5m6.fsf@me.com/
This commit is contained in:
parent
dd4d05a159
commit
341a01a07d
71
lisp/ol.el
71
lisp/ol.el
|
@ -52,6 +52,7 @@
|
|||
(declare-function org-do-occur "org" (regexp &optional cleanup))
|
||||
(declare-function org-element-at-point "org-element" (&optional pom cached-only))
|
||||
(declare-function org-element-cache-refresh "org-element" (pos))
|
||||
(declare-function org-element-cache-reset "org-element" (&optional all no-persistence))
|
||||
(declare-function org-element-context "org-element" (&optional element))
|
||||
(declare-function org-element-lineage "org-element-ast" (datum &optional types with-self))
|
||||
(declare-function org-element-link-parser "org-element" ())
|
||||
|
@ -532,6 +533,16 @@ links more efficient."
|
|||
|
||||
(defvar-local org-target-link-regexp nil
|
||||
"Regular expression matching radio targets in plain text.")
|
||||
(defconst org-target-link-regexp-limit (ash 2 10)
|
||||
"Maximum allowed length of regexp.
|
||||
The number should generally be ~order of magnitude smaller than
|
||||
MAX_BUF_SIZE in src/regex-emacs.c. The number of regexp-emacs.c is
|
||||
for processed regexp, which appears to be larger compared to the
|
||||
original string length.")
|
||||
(defvar-local org-target-link-regexps nil
|
||||
"List of regular expressions matching radio targets in plain text.
|
||||
This list is non-nil, when a single regexp would be too long to match
|
||||
all the possible targets, exceeding Emacs' regexp length limit.")
|
||||
|
||||
(defvar org-link-types-re nil
|
||||
"Matches a link that has a url-like prefix like \"http:\".")
|
||||
|
@ -2170,6 +2181,34 @@ This command can be called in any mode to insert a link in Org syntax."
|
|||
(org-load-modules-maybe)
|
||||
(org-run-like-in-org-mode 'org-insert-link))
|
||||
|
||||
(defun org--re-list-search-forward (regexp-list &optional bound noerror count)
|
||||
"Like `re-search-forward', but REGEXP-LIST is a list of regexps.
|
||||
BOUND, NOERROR, and COUNT are passed to `re-search-forward'."
|
||||
(let (result (min-found most-positive-fixnum)
|
||||
(pos-found nil)
|
||||
(min-found-data nil)
|
||||
(tail regexp-list))
|
||||
(while tail
|
||||
(setq result (save-excursion (re-search-forward (pop tail) bound t count)))
|
||||
(when (and result (< result min-found))
|
||||
(setq min-found result
|
||||
pos-found (match-end 0)
|
||||
min-found-data (match-data))))
|
||||
(if (= most-positive-fixnum min-found)
|
||||
(pcase noerror
|
||||
(`t nil)
|
||||
(_ (re-search-forward (car regexp-list) bound noerror count)))
|
||||
(set-match-data min-found-data)
|
||||
(goto-char pos-found))))
|
||||
|
||||
(defun org--re-list-looking-at (regexp-list &optional inhibit-modify)
|
||||
"Like `looking-at', but REGEXP-LIST is a list of regexps.
|
||||
INHIBIT-MODIFY is passed to `looking-at'."
|
||||
(catch :found
|
||||
(while regexp-list
|
||||
(when (looking-at (pop regexp-list) inhibit-modify)
|
||||
(throw :found t)))))
|
||||
|
||||
;;;###autoload
|
||||
(defun org-update-radio-target-regexp ()
|
||||
"Find all radio targets in this file and update the regular expression.
|
||||
|
@ -2207,6 +2246,30 @@ Also refresh fontification if needed."
|
|||
targets
|
||||
"\\|")
|
||||
after-re)))
|
||||
(setq org-target-link-regexps nil)
|
||||
(let (current-length sub-targets)
|
||||
(when (<= org-target-link-regexp-limit (length org-target-link-regexp))
|
||||
(while (or targets sub-targets)
|
||||
(when (and sub-targets
|
||||
(or (not targets)
|
||||
(>= (+ current-length (length (car targets)))
|
||||
org-target-link-regexp-limit)))
|
||||
(push (concat before-re
|
||||
(mapconcat
|
||||
(lambda (x)
|
||||
(replace-regexp-in-string
|
||||
" +" "\\s-+" (regexp-quote x) t t))
|
||||
(nreverse sub-targets)
|
||||
"\\|")
|
||||
after-re)
|
||||
org-target-link-regexps)
|
||||
(setq current-length nil
|
||||
sub-targets nil))
|
||||
(unless current-length
|
||||
(setq current-length (+ (length before-re) (length after-re))))
|
||||
(when targets (push (pop targets) sub-targets))
|
||||
(cl-incf current-length (length (car sub-targets))))
|
||||
(setq org-target-link-regexps (nreverse org-target-link-regexps))))
|
||||
(unless (equal old-regexp org-target-link-regexp)
|
||||
;; Clean-up cache.
|
||||
(let ((regexp (cond ((not old-regexp) org-target-link-regexp)
|
||||
|
@ -2222,9 +2285,11 @@ Also refresh fontification if needed."
|
|||
after-re)))))
|
||||
(when (and (featurep 'org-element)
|
||||
(not (bound-and-true-p org-mode-loading)))
|
||||
(org-with-point-at 1
|
||||
(while (re-search-forward regexp nil t)
|
||||
(org-element-cache-refresh (match-beginning 1))))))
|
||||
(if org-target-link-regexps
|
||||
(org-element-cache-reset)
|
||||
(org-with-point-at 1
|
||||
(while (re-search-forward regexp nil t)
|
||||
(org-element-cache-refresh (match-beginning 1)))))))
|
||||
;; Re fontify buffer.
|
||||
(when (memq 'radio org-highlight-links)
|
||||
(org-restart-font-lock)))))
|
||||
|
|
|
@ -3848,7 +3848,9 @@ Assume point is at the beginning of the link."
|
|||
;; Type 1: Text targeted from a radio target.
|
||||
((and org-target-link-regexp
|
||||
(save-excursion (or (bolp) (backward-char))
|
||||
(looking-at org-target-link-regexp)))
|
||||
(if org-target-link-regexps
|
||||
(org--re-list-looking-at org-target-link-regexps)
|
||||
(looking-at org-target-link-regexp))))
|
||||
(setq type "radio")
|
||||
(setq format 'plain)
|
||||
(setq link-end (match-end 1))
|
||||
|
@ -5221,7 +5223,10 @@ to an appropriate container (e.g., a paragraph)."
|
|||
((not (memq 'link restriction)) nil)
|
||||
((progn
|
||||
(unless (bolp) (forward-char -1))
|
||||
(not (re-search-forward org-target-link-regexp nil t)))
|
||||
(not
|
||||
(if org-target-link-regexps
|
||||
(org--re-list-search-forward org-target-link-regexps nil t)
|
||||
(re-search-forward org-target-link-regexp nil t))))
|
||||
nil)
|
||||
;; Since we moved backward, we do not want to
|
||||
;; match again an hypothetical 1-character long
|
||||
|
@ -5230,8 +5235,11 @@ to an appropriate container (e.g., a paragraph)."
|
|||
;; beginning of line, we prevent this here.
|
||||
((and (= start (1+ (line-beginning-position)))
|
||||
(= start (match-end 1)))
|
||||
(and (re-search-forward org-target-link-regexp nil t)
|
||||
(1+ (match-beginning 1))))
|
||||
(and
|
||||
(if org-target-link-regexps
|
||||
(org--re-list-search-forward org-target-link-regexps nil t)
|
||||
(re-search-forward org-target-link-regexp nil t))
|
||||
(1+ (match-beginning 1))))
|
||||
(t (1+ (match-beginning 1))))))
|
||||
found)
|
||||
(save-excursion
|
||||
|
|
|
@ -5710,7 +5710,9 @@ by a #."
|
|||
;; `org-target-link-regexp' matches one character before the
|
||||
;; actual target.
|
||||
(unless (bolp) (forward-char -1))
|
||||
(when (re-search-forward org-target-link-regexp limit t)
|
||||
(when (if org-target-link-regexps
|
||||
(org--re-list-search-forward org-target-link-regexps limit t)
|
||||
(re-search-forward org-target-link-regexp limit t))
|
||||
(org-remove-flyspell-overlays-in (match-beginning 1) (match-end 1))
|
||||
(add-text-properties (match-beginning 1) (match-end 1)
|
||||
(list 'mouse-face 'highlight
|
||||
|
|
|
@ -2387,6 +2387,23 @@ e^{i\\pi}+1=0
|
|||
(org-element-property
|
||||
:type
|
||||
(org-element-map (org-element-parse-buffer) 'link #'identity nil t)))))
|
||||
(should
|
||||
(equal
|
||||
"radio"
|
||||
(org-test-with-temp-text "<<<radio>>><<<radio2>>><<<foo>>>A radio link"
|
||||
(org-update-radio-target-regexp)
|
||||
(org-element-property
|
||||
:type
|
||||
(org-element-map (org-element-parse-buffer) 'link #'identity nil t)))))
|
||||
(should
|
||||
(equal
|
||||
"radio"
|
||||
(let ((org-target-link-regexp-limit 9))
|
||||
(org-test-with-temp-text "<<<radio>>><<<radio2>>><<<foo>>>A radio link"
|
||||
(org-update-radio-target-regexp)
|
||||
(org-element-property
|
||||
:type
|
||||
(org-element-map (org-element-parse-buffer) 'link #'identity nil t))))))
|
||||
;; Pathological case: radio target of length 1 at beginning of line
|
||||
;; not followed by spaces.
|
||||
(should
|
||||
|
|
Loading…
Reference in New Issue