Move the "Nicer generated heading IDs" section

This commit is contained in:
TEC 2024-03-09 00:10:35 +08:00
parent 6087a89abb
commit 28e3bbf0ce
Signed by: tec
SSH Key Fingerprint: SHA256:eobz41Mnm0/iYWBvWThftS0ElEs1ftBr6jamutnXc/A
1 changed files with 214 additions and 217 deletions

View File

@ -8953,223 +8953,6 @@ with it.
(browse-url-xdg-open (format "http://localhost:%d" org-roam-ui-port))))
#+end_src
**** Nicer generated heading IDs
Thanks to alphapapa's [[https://github.com/alphapapa/unpackaged.el#export-to-html-with-useful-anchors][unpackaged.el]].
By default, Org generated heading IDs like =#org80fc2a5= which ... works, but has
two issues
+ It's completely uninformative, I have no idea what's being referenced
+ If I export the same file, everything will change.
Now, while without hardcoded values it's impossible to set references in
stone, it would be nice for there to be a decent chance of staying the same.
Both of these issues can be addressed by generating IDs like
=#language-configuration=, which is what I'll do here.
It's worth noting that alphapapa's use of ~url-hexify-string~ seemed to cause me
some issues. Replacing that in ~a53899~ resolved this for me. To go one step
further, I create a function for producing nice short links, like an inferior
version of ~reftex-label~.
#+begin_src emacs-lisp
(defvar org-reference-contraction-max-words 3
"Maximum number of words in a reference reference.")
(defvar org-reference-contraction-max-length 35
"Maximum length of resulting reference reference, including joining characters.")
(defvar org-reference-contraction-stripped-words
'("the" "on" "in" "off" "a" "for" "by" "of" "and" "is" "to")
"Superfluous words to be removed from a reference.")
(defvar org-reference-contraction-joining-char "-"
"Character used to join words in the reference reference.")
(defun org-reference-contraction-truncate-words (words)
"Using `org-reference-contraction-max-length' as the total character 'budget' for the WORDS
and truncate individual words to conform to this budget.
To arrive at a budget that accounts for words undershooting their requisite average length,
the number of characters in the budget freed by short words is distributed among the words
exceeding the average length. This adjusts the per-word budget to be the maximum feasable for
this particular situation, rather than the universal maximum average.
This budget-adjusted per-word maximum length is given by the mathematical expression below:
max length = \\floor{ \\frac{total length - chars for seperators - \\sum_{word \\leq average length} length(word) }{num(words) > average length} }"
;; trucate each word to a max word length determined by
;;
(let* ((total-length-budget (- org-reference-contraction-max-length ; how many non-separator chars we can use
(1- (length words))))
(word-length-budget (/ total-length-budget ; max length of each word to keep within budget
org-reference-contraction-max-words))
(num-overlong (-count (lambda (word) ; how many words exceed that budget
(> (length word) word-length-budget))
words))
(total-short-length (-sum (mapcar (lambda (word) ; total length of words under that budget
(if (<= (length word) word-length-budget)
(length word) 0))
words)))
(max-length (/ (- total-length-budget total-short-length) ; max(max-length) that we can have to fit within the budget
num-overlong)))
(mapcar (lambda (word)
(if (<= (length word) max-length)
word
(substring word 0 max-length)))
words)))
(defun org-reference-contraction (reference-string)
"Give a contracted form of REFERENCE-STRING that is only contains alphanumeric characters.
Strips 'joining' words present in `org-reference-contraction-stripped-words',
and then limits the result to the first `org-reference-contraction-max-words' words.
If the total length is > `org-reference-contraction-max-length' then individual words are
truncated to fit within the limit using `org-reference-contraction-truncate-words'."
(let ((reference-words
(-filter (lambda (word)
(not (member word org-reference-contraction-stripped-words)))
(split-string
(->> reference-string
downcase
(replace-regexp-in-string "\\[\\[[^]]+\\]\\[\\([^]]+\\)\\]\\]" "\\1") ; get description from org-link
(replace-regexp-in-string "[-/ ]+" " ") ; replace seperator-type chars with space
puny-encode-string
(replace-regexp-in-string "^xn--\\(.*?\\) ?-?\\([a-z0-9]+\\)$" "\\2 \\1") ; rearrange punycode
(replace-regexp-in-string "[^A-Za-z0-9 ]" "") ; strip chars which need %-encoding in a uri
) " +"))))
(when (> (length reference-words)
org-reference-contraction-max-words)
(setq reference-words
(cl-subseq reference-words 0 org-reference-contraction-max-words)))
(when (> (apply #'+ (1- (length reference-words))
(mapcar #'length reference-words))
org-reference-contraction-max-length)
(setq reference-words (org-reference-contraction-truncate-words reference-words)))
(string-join reference-words org-reference-contraction-joining-char)))
#+end_src
Now here's alphapapa's subtly tweaked mode.
#+begin_src emacs-lisp
(define-minor-mode unpackaged/org-export-html-with-useful-ids-mode
"Attempt to export Org as HTML with useful link IDs.
Instead of random IDs like \"#orga1b2c3\", use heading titles,
made unique when necessary."
:global t
(if unpackaged/org-export-html-with-useful-ids-mode
(advice-add #'org-export-get-reference :override #'unpackaged/org-export-get-reference)
(advice-remove #'org-export-get-reference #'unpackaged/org-export-get-reference)))
(unpackaged/org-export-html-with-useful-ids-mode 1) ; ensure enabled, and advice run
(defun unpackaged/org-export-get-reference (datum info)
"Like `org-export-get-reference', except uses heading titles instead of random numbers."
(let ((cache (plist-get info :internal-references)))
(or (car (rassq datum cache))
(let* ((crossrefs (plist-get info :crossrefs))
(cells (org-export-search-cells datum))
;; Preserve any pre-existing association between
;; a search cell and a reference, i.e., when some
;; previously published document referenced a location
;; within current file (see
;; `org-publish-resolve-external-link').
;;
;; However, there is no guarantee that search cells are
;; unique, e.g., there might be duplicate custom ID or
;; two headings with the same title in the file.
;;
;; As a consequence, before re-using any reference to
;; an element or object, we check that it doesn't refer
;; to a previous element or object.
(new (or (cl-some
(lambda (cell)
(let ((stored (cdr (assoc cell crossrefs))))
(when stored
(let ((old (org-export-format-reference stored)))
(and (not (assoc old cache)) stored)))))
cells)
(when (org-element-property :raw-value datum)
;; Heading with a title
(unpackaged/org-export-new-named-reference datum cache))
(when (member (car datum) '(src-block table example fixed-width property-drawer))
;; Nameable elements
(unpackaged/org-export-new-named-reference datum cache))
;; NOTE: This probably breaks some Org Export
;; feature, but if it does what I need, fine.
(org-export-format-reference
(org-export-new-reference cache))))
(reference-string new))
;; Cache contains both data already associated to
;; a reference and in-use internal references, so as to make
;; unique references.
(dolist (cell cells) (push (cons cell new) cache))
;; Retain a direct association between reference string and
;; DATUM since (1) not every object or element can be given
;; a search cell (2) it permits quick lookup.
(push (cons reference-string datum) cache)
(plist-put info :internal-references cache)
reference-string))))
(defun unpackaged/org-export-new-named-reference (datum cache)
"Return new reference for DATUM that is unique in CACHE."
(cl-macrolet ((inc-suffixf (place)
`(progn
(string-match (rx bos
(minimal-match (group (1+ anything)))
(optional "--" (group (1+ digit)))
eos)
,place)
;; HACK: `s1' instead of a gensym.
(-let* (((s1 suffix) (list (match-string 1 ,place)
(match-string 2 ,place)))
(suffix (if suffix
(string-to-number suffix)
0)))
(setf ,place (format "%s--%s" s1 (cl-incf suffix)))))))
(let* ((headline-p (eq (car datum) 'headline))
(title (if headline-p
(org-element-property :raw-value datum)
(or (org-element-property :name datum)
(concat (org-element-property :raw-value
(org-element-property :parent
(org-element-property :parent datum)))))))
;; get ascii-only form of title without needing percent-encoding
(ref (concat (org-reference-contraction (substring-no-properties title))
(unless (or headline-p (org-element-property :name datum))
(concat ","
(pcase (car datum)
('src-block "code")
('example "example")
('fixed-width "mono")
('property-drawer "properties")
(_ (symbol-name (car datum))))
"--1"))))
(parent (when headline-p (org-element-property :parent datum))))
(while (--any (equal ref (car it))
cache)
;; Title not unique: make it so.
(if parent
;; Append ancestor title.
(setf title (concat (org-element-property :raw-value parent)
"--" title)
;; get ascii-only form of title without needing percent-encoding
ref (org-reference-contraction (substring-no-properties title))
parent (when headline-p (org-element-property :parent parent)))
;; No more ancestors: add and increment a number.
(inc-suffixf ref)))
ref)))
(add-hook 'org-load-hook #'unpackaged/org-export-html-with-useful-ids-mode)
#+end_src
We also need to redefine src_elisp{(org-export-format-reference)} as it now may
be passed a string as well as a number.
#+begin_src emacs-lisp
(defadvice! org-export-format-reference-a (reference)
"Format REFERENCE into a string.
REFERENCE is a either a number or a string representing a reference,
as returned by `org-export-new-reference'."
:override #'org-export-format-reference
(if (stringp reference) reference (format "org%07x" reference)))
#+end_src
**** Nicer ~org-return~
Once again, from [[https://github.com/alphapapa/unpackaged.el#org-return-dwim][unpackaged.el]]
@ -10118,6 +9901,220 @@ TODO abstract backend implementations."
(setq org-latex-format-headline-function #'org-latex-format-headline-acronymised)
#+end_src
**** Nicer generated heading IDs
Thanks to alphapapa's [[https://github.com/alphapapa/unpackaged.el#export-to-html-with-useful-anchors][unpackaged.el]].
By default, Org generated heading IDs like =#org80fc2a5= which ... works, but has
two issues
+ It's completely uninformative, I have no idea what's being referenced
+ If I export the same file, everything will change.
Now, while without hardcoded values it's impossible to set references in
stone, it would be nice for there to be a decent chance of staying the same.
Both of these issues can be addressed by generating IDs like
=#language-configuration=, which is what I'll do here.
It's worth noting that alphapapa's use of ~url-hexify-string~ seemed to cause me
some issues. Replacing that in ~a53899~ resolved this for me. To go one step
further, I create a function for producing nice short links, like an inferior
version of ~reftex-label~.
#+begin_src emacs-lisp
(defvar org-reference-contraction-max-words 3
"Maximum number of words in a reference reference.")
(defvar org-reference-contraction-max-length 35
"Maximum length of resulting reference reference, including joining characters.")
(defvar org-reference-contraction-stripped-words
'("the" "on" "in" "off" "a" "for" "by" "of" "and" "is" "to")
"Superfluous words to be removed from a reference.")
(defvar org-reference-contraction-joining-char "-"
"Character used to join words in the reference reference.")
(defun org-reference-contraction-truncate-words (words)
"Using `org-reference-contraction-max-length' as the total character 'budget' for the WORDS
and truncate individual words to conform to this budget.
To arrive at a budget that accounts for words undershooting their requisite average length,
the number of characters in the budget freed by short words is distributed among the words
exceeding the average length. This adjusts the per-word budget to be the maximum feasable for
this particular situation, rather than the universal maximum average.
This budget-adjusted per-word maximum length is given by the mathematical expression below:
max length = \\floor{ \\frac{total length - chars for seperators - \\sum_{word \\leq average length} length(word) }{num(words) > average length} }"
;; trucate each word to a max word length determined by
;;
(let* ((total-length-budget (- org-reference-contraction-max-length ; how many non-separator chars we can use
(1- (length words))))
(word-length-budget (/ total-length-budget ; max length of each word to keep within budget
org-reference-contraction-max-words))
(num-overlong (-count (lambda (word) ; how many words exceed that budget
(> (length word) word-length-budget))
words))
(total-short-length (-sum (mapcar (lambda (word) ; total length of words under that budget
(if (<= (length word) word-length-budget)
(length word) 0))
words)))
(max-length (/ (- total-length-budget total-short-length) ; max(max-length) that we can have to fit within the budget
num-overlong)))
(mapcar (lambda (word)
(if (<= (length word) max-length)
word
(substring word 0 max-length)))
words)))
(defun org-reference-contraction (reference-string)
"Give a contracted form of REFERENCE-STRING that is only contains alphanumeric characters.
Strips 'joining' words present in `org-reference-contraction-stripped-words',
and then limits the result to the first `org-reference-contraction-max-words' words.
If the total length is > `org-reference-contraction-max-length' then individual words are
truncated to fit within the limit using `org-reference-contraction-truncate-words'."
(let ((reference-words
(cl-remove-if-not
(lambda (word)
(not (member word org-reference-contraction-stripped-words)))
(let ((str reference-string))
(setq str (downcase str))
(setq str (replace-regexp-in-string "\\[\\[[^]]+\\]\\[\\([^]]+\\)\\]\\]" "\\1" str)) ; get description from org-link
(setq str (replace-regexp-in-string "[-/ ]+" " " str)) ; replace seperator-type chars with space
(setq str (puny-encode-string str))
(setq str (replace-regexp-in-string "^xn--\\(.*?\\) ?-?\\([a-z0-9]+\\)$" "\\2 \\1" str)) ; rearrange punycode
(setq str (replace-regexp-in-string "[^A-Za-z0-9 ]" "" str)) ; strip chars which need %-encoding in a uri
(split-string str " +")))))
(when (> (length reference-words)
org-reference-contraction-max-words)
(setq reference-words
(cl-subseq reference-words 0 org-reference-contraction-max-words)))
(when (> (apply #'+ (1- (length reference-words))
(mapcar #'length reference-words))
org-reference-contraction-max-length)
(setq reference-words (org-reference-contraction-truncate-words reference-words)))
(string-join reference-words org-reference-contraction-joining-char)))
#+end_src
Now here's alphapapa's subtly tweaked mode.
#+begin_src emacs-lisp
(define-minor-mode unpackaged/org-export-html-with-useful-ids-mode
"Attempt to export Org as HTML with useful link IDs.
Instead of random IDs like \"#orga1b2c3\", use heading titles,
made unique when necessary."
:global t
(if unpackaged/org-export-html-with-useful-ids-mode
(advice-add #'org-export-get-reference :override #'unpackaged/org-export-get-reference)
(advice-remove #'org-export-get-reference #'unpackaged/org-export-get-reference)))
(unpackaged/org-export-html-with-useful-ids-mode 1) ; ensure enabled, and advice run
(defun unpackaged/org-export-get-reference (datum info)
"Like `org-export-get-reference', except uses heading titles instead of random numbers."
(let ((cache (plist-get info :internal-references)))
(or (car (rassq datum cache))
(let* ((crossrefs (plist-get info :crossrefs))
(cells (org-export-search-cells datum))
;; Preserve any pre-existing association between
;; a search cell and a reference, i.e., when some
;; previously published document referenced a location
;; within current file (see
;; `org-publish-resolve-external-link').
;;
;; However, there is no guarantee that search cells are
;; unique, e.g., there might be duplicate custom ID or
;; two headings with the same title in the file.
;;
;; As a consequence, before re-using any reference to
;; an element or object, we check that it doesn't refer
;; to a previous element or object.
(new (or (cl-some
(lambda (cell)
(let ((stored (cdr (assoc cell crossrefs))))
(when stored
(let ((old (org-export-format-reference stored)))
(and (not (assoc old cache)) stored)))))
cells)
(when (org-element-property :raw-value datum)
;; Heading with a title
(unpackaged/org-export-new-named-reference datum cache))
(when (member (car datum) '(src-block table example fixed-width property-drawer))
;; Nameable elements
(unpackaged/org-export-new-named-reference datum cache))
;; NOTE: This probably breaks some Org Export
;; feature, but if it does what I need, fine.
(org-export-format-reference
(org-export-new-reference cache))))
(reference-string new))
;; Cache contains both data already associated to
;; a reference and in-use internal references, so as to make
;; unique references.
(dolist (cell cells) (push (cons cell new) cache))
;; Retain a direct association between reference string and
;; DATUM since (1) not every object or element can be given
;; a search cell (2) it permits quick lookup.
(push (cons reference-string datum) cache)
(plist-put info :internal-references cache)
reference-string))))
(defun unpackaged/org-export-new-named-reference (datum cache)
"Return new reference for DATUM that is unique in CACHE."
(cl-macrolet ((inc-suffixf (place)
`(progn
(string-match (rx bos
(minimal-match (group (1+ anything)))
(optional "--" (group (1+ digit)))
eos)
,place)
;; HACK: `s1' instead of a gensym.
(let* ((s1 (match-string 1 ,place))
(suffix-1 (match-string 2 ,place))
(suffix (if suffix-1 (string-to-number suffix-1) 0)))
(setf ,place (format "%s--%s" s1 (1+ suffix)))))))
(let* ((headline-p (eq (car datum) 'headline))
(title (if headline-p
(org-element-property :raw-value datum)
(or (org-element-property :name datum)
(concat (org-element-property :raw-value
(org-element-property :parent
(org-element-property :parent datum)))))))
;; get ascii-only form of title without needing percent-encoding
(ref (concat (org-reference-contraction (substring-no-properties title))
(unless (or headline-p (org-element-property :name datum))
(concat ","
(pcase (car datum)
('src-block "code")
('example "example")
('fixed-width "mono")
('property-drawer "properties")
(_ (symbol-name (car datum))))
"--1"))))
(parent (when headline-p (org-element-property :parent datum))))
(while (member ref (mapcar #'car cache))
;; Title not unique: make it so.
(if parent
;; Append ancestor title.
(setf title (concat (org-element-property :raw-value parent)
"--" title)
;; get ascii-only form of title without needing percent-encoding
ref (org-reference-contraction (substring-no-properties title))
parent (when headline-p (org-element-property :parent parent)))
;; No more ancestors: add and increment a number.
(inc-suffixf ref)))
ref)))
(add-hook 'org-load-hook #'unpackaged/org-export-html-with-useful-ids-mode)
#+end_src
We also need to redefine src_elisp{(org-export-format-reference)} as it now may
be passed a string as well as a number.
#+begin_src emacs-lisp
(defadvice! org-export-format-reference-a (reference)
"Format REFERENCE into a string.
REFERENCE is a either a number or a string representing a reference,
as returned by `org-export-new-reference'."
:override #'org-export-format-reference
(if (stringp reference) reference (format "org%07x" reference)))
#+end_src
**** Strip zero width spaces
Zero width spaces are handy as a semantic separator, but not something we want