Initial commit

This commit is contained in:
TEC 2024-03-28 01:42:37 +08:00
commit d686f8285d
Signed by: tec
SSH Key Fingerprint: SHA256:eobz41Mnm0/iYWBvWThftS0ElEs1ftBr6jamutnXc/A
2 changed files with 594 additions and 0 deletions

119
README.org Normal file
View File

@ -0,0 +1,119 @@
#+title: Autocorrect
#+subtitle: Spellchecker-agnostic autocorrect for Emacs
#+author: tecosaur
If you want to write without looking like you skipped a chunk of
primary/secondary school (as I do), then autocorrect is a handy thing to have.
Beyond just misspellings, it can also help with typos, and lazy capitalisation
(can you really be bothered to consistently type "LuaLaTeX" instead of
"lualatex" and "SciFi" over "scifi"?). However, primarily thanks to smartphones,
I more often hear people cursing autocorrect than praising it. With that in
mind, I think it's worth giving some thought to how smartphone autocorrect gets
its bad reputation (despite largely doing a decent job):
1. Typing is harder on smartphones, and so autocorrect makes bigger (more speculative) guesses
2. People type (and mistype) differently, but autocorrect tries to have a "one
size fits all" profile that is refined over time
3. As soon as you accept a particular correction, autocorrect can start applying
that even when the original typo is ambiguous and has multiple "corrected" forms
4. It's hard to tell the phone to stop doing a particular autocorrect (see
"Emacs" recapitalised as "eMacs" on Apple devices)
I think we can largely alleviate these problems by
1. Being mainly used on devices with actual keyboards
2. Starting with an empty autocorrect "profile", built up by the user over time
3. Having a customisable threshold before a repeated correction is made into an
autocorrection, and blacklisting misspellings with multiple distinct corrections.
4. Making it easy to blacklist certain words from becoming autocorrections
Another complaint about autocorrect is that it lets you develop bad habits, and
if anything a tool that got you to retype the correct spelling several times
would be more valuable in the long run. I think this is a pretty reasonable
complaint, and have two different trains of thought that both justify tracking
corrections made:
+ I almost never leave Emacs for writing more than a text message, so what if I
type worse outside of it?
+ By tracking corrections made, you can also make a personal "most common
misspellings" training list to run through at your leasure or when committing
a misspelling. Just set the "minimum replacement count" to a stupidly high
number and optionally make use of ~autocorrect-post-correct-hook~.
* Activation
Some time soon after startup, run ~autocorrect-setup~. I like to do this with an
idle timer.
#+begin_src emacs-lisp
(run-with-idle-timer 0.5 nil #'autocorrect-setup)
#+end_src
Remember to hook up the spell checker you're using, make sure =abbrev= mode is on,
and you're good to go.
* Spellchecker integration
** Generic
=autocorrect= needs to be told when a spelling correction has been made. This
should be done through the function ~autocorrect-record-correction~.
At a bare minimum, just invoking ~autocorrect-record-correction~ appropriately
will make =autocorrect= start working, however there are two more optional steps
to integration that can enhance the experience.
1. Set ~autocorrect-check-spelling-function~ so that letter casing is handled a bit better
2. Set ~autocorrect-predicates~ to control where corrections can occur
** Jinx
#+begin_src emacs-lisp
(defun autocorrect-jinx-record-correction (overlay corrected)
"Record that Jinx corrected the text in OVERLAY to CORRECTED."
(let ((text
(buffer-substring-no-properties
(overlay-start overlay)
(overlay-end overlay))))
(autocorrect-record-correction text corrected)))
(defun autocorrect-jinx-check-spelling (word)
"Check if WORD is valid."
;; Mostly a copy of `jinx--word-valid-p', just without the buffer substring.
;; It would have been nice if `jinx--word-valid-p' implemented like this
;; with `jinx--this-word-valid-p' (or similar) as the at-point variant.
(or (member word jinx--session-words)
;; Allow capitalized words
(and (string-match-p "\\`[[:upper:]][[:lower:]]+\\'" word)
(cl-loop
for w in jinx--session-words
thereis (and (string-equal-ignore-case word w)
(string-match-p "\\`[[:lower:]]+\\'" w))))
(cl-loop for dict in jinx--dicts
thereis (jinx--mod-check dict word))))
(defun autocorrect-jinx-appropriate (pos)
"Return non-nil if it is appropriate to spellcheck at POS according to jinx."
(and (not (jinx--face-ignored-p pos))
(not (jinx--regexp-ignored-p pos))))
(setq autocorrect-check-spelling-function #'autocorrect-jinx-check-spelling)
(add-to-list 'autocorrect-predicates #'autocorrect-jinx-appropriate)
(advice-add 'jinx--correct-replace :before #'autocorrect-jinx-record-correction)
#+end_src
** Flyspell
#+begin_src emacs-lisp
(defvar-local autocorrect-flyspell-misspelling)
(defun autocorrect-flyspell-insert (word)
"Insert WORD and record the correction with autocorrect.el."
(autocorrect-record-correction
(or autocorrect-flyspell-misspelling flyspell-auto-correct-word)
word)
(insert word))
(defun autocorrect--flyspell-do-correct-a (oldfun replace poss word cursor-location start end save)
"Wraps `flyspell-do-correct' to store the word it's correcting."
(let ((autocorrect-flyspell-misspelling word))
(funcall oldfun replace poss word cursor-location start end save)))
(setq flyspell-insert-function autocorrect-flyspell-insert)
(advice-add 'flyspell-do-correct :around #'autocorrect--flyspell-do-correct-a)
#+end_src

475
autocorrect.el Normal file
View File

@ -0,0 +1,475 @@
;;; autocorrect.el --- Spellchecker agnostic autocorrection -*- lexical-binding: t; -*-
;;
;; Copyright (C) 2024 TEC
;;
;; Author: TEC <contact@tecosaur.net>
;; Maintainer: TEC <contact@tecosaur.net>
;; Created: March 27, 2024
;; Modified: March 27, 2024
;; Version: 0.1.0
;; Keywords: convenience, text
;; Homepage: https://code.tecosaur.net/tec/autocorrect.el
;; Package-Requires: ((emacs "29.1"))
;;
;; This file is not part of GNU Emacs.
;;
;;; Commentary:
;;
;; Spellchecker agnostic autocorrect in Emacs
;;
;;; Code:
(eval-when-compile
(require 'subr-x))
(defgroup autocorrect nil
"Automatically fix typos and frequent spelling mistakes."
:group 'text
:prefix "autocorrect-")
;;;; Customisation variables:
;; To record corrections made, we can just use a simple text file.
;;
;; For simplicity of operation, I think we can just append each correction the
;; file as "misspelled corrected" lines. This has a number of advantages, such
;; as avoiding recalculations while typing, avoiding race conditions with
;; multiple Emacs sessions, and making merging data on different machines
;; trivial.
;;
;; In the Emacs session though, I think we'll want to have a hash table of the
;; counts of each correction. We can have the misspelled words as the keys, and
;; then have each value be an alist of ~(correction . count)~ pairs. This table
;; can be lazily built and processed after startup.
(defcustom autocorrect-file
(file-name-concat (or (getenv "XDG_STATE_HOME") "~/.local/state")
"emacs" "autocorrections.txt")
"File where a record of spell checks and autocorrections is saved.
Each line of this file is of the form:
MISSPELLING [MANUAL-CORRECTION-COUNT AUTOCORRECTION-COUNT] CORRECTED
Where MANUAL-CORRECTION-COUNT and AUTOCORRECTION-COUNT are
optional (indicated by the brackets)."
:type 'file)
(defvar autocorrect--table (make-hash-table :test #'equal)
"A record of all corrections made, generated from `autocorrect-file'.
Misspelled words are the keys, and a alist of corrections and their counts are
the values.")
;; We probably want to also specify a threshold number of misspellings that
;; trigger entry to the abbrev table, both on load and when made during the
;; current Emacs session. For now, I'll try a value of three for on-load and two
;; for misspellings made in the current Emacs session. I think I want to avoid a
;; value of one since that makes it easy for a misspelling with multiple valid
;; corrections to become associated with a single correction too soon. This is a
;; rare concern, but it would be annoying enough to run into that I think it's
;; worth requiring a second misspelling.
(defcustom autocorrect-count-threshold-alltime 3
"The number of recorded identical misspellings to create an abbrev.
This applies to misspellings read from `autocorrect-file'."
:type 'natnum)
(defcustom autocorrect-count-threshold-session 2
"The number of identical misspellings to create an abbrev.
This applies to misspellings made in the current Emacs session."
:type 'natnum)
;; It's probably not sensible to do autocorrection anywhere,
;; and predicate functions are a flexible way of allowing this to be specified.
(defcustom autocorrect-predicates nil
"Predicate functions called at point with argument START.
These functions should return t if autocorrection is valid at START."
:type '(repeat function))
(defcustom autocorrect-post-correct-hook nil
"Hook run after an autocorrection has occurred.
Each function is called with two arguments, the original misspelling and the
correction inserted.
In some cases (when the correction consists of multiple words), it is not
possible to determine this information and this hook will not be run.
This limitation stems from the how abbrev implements post-insertion hooks."
:type 'hook)
;; When we handle just-performed spelling corrections, if the word is capitalised
;; it could either be because:
;; - It is appearing at the start of the sentence
;; - It is a proper noun, and should always be capitalised
;;
;; We want to differentiate these two cases, which we can do by converting the
;; corrected word to lowercase and testing whether that form is spellchecked as
;; correct.
(defcustom autocorrect-check-spelling-function nil
"Predicate function that indicates whether a word is correctly spelt.
This is used to check whether a correction can be safely lowercased."
:type '(choice function (const nil)))
;;;; Internal variables:
(defvar autocorrect--abbrev-table nil
"The spelling abbrev table.")
(defvar autocorrect--abbrev-table-saved-version 0
"The version of `autocorrect--abbrev-table' saved to disk.")
(defvar autocorrect--file-mtime (seconds-to-time 0)
"The mtime of `autocorrect-file' last time it was looked at.")
(defvar autocorrect--should-update-savefile nil
"Indicator for whether there are any changes that should be written on save.")
;;;; Minor mode:
;;;###autoload
(define-minor-mode autocorrect-mode
"Automatically correct misspellings with abbrev."
:init-value t)
;;;###autoload
(define-globalized-minor-mode global-autocorrect-mode
autocorrect-mode autocorrect--enable)
(defun autocorrect--enable ()
"Turn on `autocorrect-mode' in the current buffer."
(autocorrect-mode 1))
;;;; Utility functions:
(defun autocorrect--appropriate-p ()
"Return non-nil it is currently appropriate to make an autocorrection.
See `autocorrect-predicates'."
(and autocorrect-mode
(run-hook-with-args-until-failure 'autocorrect-predicates (point))))
(defun autocorrect--run-post-correct-hook ()
"Determine the correction, and run `autocorrect-post-correct-hook'.
It is expected that this is run as a post-abbrev hook within `abbrev-insert'.
This assumption allows us to determine the newly inserted correction, so long as
it is a single word, and then perform a reverse lookup of `autocorrect--table'
to find the original misspelling."
(when autocorrect-post-correct-hook
(let ((correction (substring-no-properties (thing-at-point 'word)))
misspelling)
(maphash
(lambda (misp corrections)
(unless misspelling
(when (equal (caar corrections) correction)
(setq misspelling misp))))
autocorrect--table)
(when misspelling
(run-hook-with-args 'autocorrect-post-correct-hook
misspelling correction)))))
(defun autocorrect--should-downcase-p (word)
"Check whether it is a good idea to `downcase' WORD.
This is conditional on all of the following being true:
- WORD starts with a capital letter
- The rest of WORD is either entirely lower or upper case
(i.e. WORD is like \"Capitalised\" or \"UPPERCASE\")
- The lowercase form of WORD satisfies `autocorrect-check-spelling-function'"
(and autocorrect-check-spelling-function
(char-uppercase-p (aref word 0))
;; To check whether a function is indeed lowercase we'll try using
;; ~char-uppercase-p~ instead of Regexp for speed (I think but haven't
;; tested that this will be faster).
(let ((letter-cases (mapcar #'char-uppercase-p word)))
(or (not (memq t (cdr letter-cases)))
(not (memq nil (cdr letter-cases)))))
(funcall autocorrect-check-spelling-function
(downcase word))))
(defun autocorrect--update-table (misspelling corrected &optional manual-count auto-count)
"Update the MISSPELLING to CORRECTED entry in the table.
Unless specified, it is assumed that MANUAL-COUNT is 1 and
AUTO-COUNT is 0, referring to the number of times MISSPELLING has
been manually and automatically corrected to CORRECTED
respectively."
(let* ((correction-counts
(gethash misspelling autocorrect--table))
(record-cons
(assoc corrected correction-counts))
(manual-count (or manual-count 1))
(auto-count (or auto-count 0)))
(if record-cons
(setcdr record-cons
(cons (+ (cadr record-cons) manual-count)
(+ (cddr record-cons) auto-count)))
(puthash misspelling
(let ((cinfo (cons corrected (cons manual-count auto-count))))
(if correction-counts
(push cinfo correction-counts)
(list cinfo)))
autocorrect--table))))
;;;; Abbrev management:
(defun autocorrect--set-abbrev (misspelling corrected)
"Create an abbrev from MISSPELLING to CORRECTED in `autocorrect--abbrev-table'."
(let ((sym (obarray-put autocorrect--abbrev-table misspelling)))
(unless (and (boundp sym) (equal (symbol-value sym) corrected))
(set sym corrected)
(fset sym #'autocorrect--run-post-correct-hook)
(setplist sym (list :count 0 :system t))
(abbrev-table-put
autocorrect--abbrev-table :abbrev-table-modiff
(1+ (abbrev-table-get autocorrect--abbrev-table :abbrev-table-modiff))))))
(defun autocorrect--unset-abbrev (misspelling)
"Remove any abbrevs associated with MISSPELLING in `autocorrect--abbrev-table'."
(let ((sym (obarray-get autocorrect--abbrev-table misspelling)))
(when sym (obarray-remove autocorrect--abbrev-table sym))))
(defun autocorrect--setup-abbrevs ()
"Setup `autocorrect--abbrev-table'.
Also set it as a parent of `global-abbrev-table'."
(unless autocorrect--abbrev-table
(setq autocorrect--abbrev-table
(make-abbrev-table (list :enable-function #'autocorrect--appropriate-p)))
(abbrev-table-put
global-abbrev-table :parents
(cons autocorrect--abbrev-table
(abbrev-table-get global-abbrev-table :parents)))))
(defun autocorrect--create-abbrevs ()
"Apply the history threshold to the current correction table."
(maphash
(lambda (misspelling corrections)
(when (and (= (length corrections) 1)
(>= (cadar corrections)
autocorrect-count-threshold-alltime))
(autocorrect--set-abbrev misspelling (caar corrections))))
autocorrect--table))
(defun autocorrect--transfer-abbrev-counts-to-table ()
"Transfer autocorrect counts from the abbrev table to the record table.
More specifically, this finds all entries of `autocorrect--abbrev-table'
with a non-zero :count field, adds that value to the autocorrect count via
`autocorrect--update-table', and zeros the :count field."
(let (symbols)
(mapatoms
(lambda (sym)
(push sym symbols))
autocorrect--abbrev-table)
(dolist (sym symbols)
(when (and (gethash (symbol-name sym) autocorrect--table)
(> (get sym :count) 0))
(autocorrect--update-table
(symbol-name sym) (symbol-value sym)
0 (get sym :count))
(put sym :count 0)))))
(defun autocorrect--remove-invalid-abbrevs ()
"Remove entries of `autocorrect--abbrev-table' not in `autocorrect--table'."
(mapatoms
(lambda (symb)
(when (symbol-value symb)
(let ((misspelling (symbol-name symb)))
(let ((corrections (gethash misspelling autocorrect--table)))
(unless (and (= (length corrections) 1)
(>= (cadar corrections)
autocorrect-count-threshold-alltime))
(obarray-remove autocorrect--abbrev-table symb))))))
autocorrect--abbrev-table))
(defun autocorrect--sync-abbrevs ()
"Synchronise `autocorrect--abbrev-table' with `autocorrect--table'."
(autocorrect--remove-invalid-abbrevs)
(autocorrect--create-abbrevs))
;;;; History management:
(defun autocorrect--read ()
"Read `autocorrect-file' into the correction table."
(unless (hash-table-empty-p autocorrect--table)
(setq autocorrect--table (make-hash-table :test #'equal)))
(if (file-exists-p autocorrect-file)
(with-temp-buffer
(insert-file-contents autocorrect-file)
(setq autocorrect--file-mtime
(file-attribute-modification-time (file-attributes autocorrect-file)))
(goto-char (point-min))
(let ((pt (point))
misspelling next-word
corrected manual-count auto-count)
(while (< (point) (point-max))
(setq misspelling
(and (forward-word)
(buffer-substring pt (point)))
pt (1+ (point)))
(setq next-word
(or (and (forward-word)
(buffer-substring pt (point)))
""))
(setq manual-count (string-to-number next-word))
(if (and (= manual-count 0) (not (string= next-word "0")))
(setq corrected
(if (eolp) next-word (buffer-substring pt (pos-eol)))
manual-count 1
auto-count 0)
(setq pt (1+ (point))
next-word
(or (and (forward-word)
(buffer-substring pt (point)))
""))
(setq auto-count (string-to-number next-word)
corrected
(if (and (= auto-count 0) (not (string= next-word "0")))
(if (eolp) next-word (buffer-substring pt (pos-eol)))
(buffer-substring (1+ (point)) (pos-eol)))))
(forward-line 1)
(setq pt (point))
(when (and misspelling corrected)
(autocorrect--update-table
misspelling corrected manual-count auto-count)))))
(make-directory (file-name-directory autocorrect-file))
(write-region "" nil autocorrect-file)))
(defun autocorrect--write ()
"Write the current `autocorrect--table' to `autocorrect-file'."
(let ((write-region-inhibit-fsync t) ; Quicker writes, not needed
(coding-system-for-write 'utf-8)
(inhibit-message t))
(with-temp-buffer
(maphash
(lambda (misspelling corrections)
(dolist (correction corrections)
(insert misspelling
" " (number-to-string (cadr correction))
" " (number-to-string (cddr correction))
" " (car correction)
"\n")))
autocorrect--table)
(write-region nil nil autocorrect-file))))
(defun autocorrect-save ()
"Save the current autocorrect information."
(while (time-less-p
autocorrect--file-mtime
(file-attribute-modification-time (file-attributes autocorrect-file)))
(autocorrect--read))
(autocorrect--transfer-abbrev-counts-to-table)
(autocorrect--write))
;;;; UI
(defun autocorrect-ignore-word (word)
"Prevent WORD from being autocorrected."
(interactive
(list (completing-read
"Word: " (sort (hash-table-keys autocorrect--table) #'string<))))
(autocorrect--update-table word "" 0 0)
(autocorrect--unset-abbrev word))
(defconst autocorrect--list-format
`[("Misspelling" 16
,(lambda (a b)
(string< (aref (cadr a) 0) (aref (cadr b) 0))))
("" 1 nil)
("Correction" 16
,(lambda (a b)
(string< (aref (cadr a) 2) (aref (cadr b) 2))))
("#Manual" 8
,(lambda (a b)
(< (string-to-number (aref (cadr a) 3))
(string-to-number (aref (cadr b) 3))))
:right-align t)
("#Auto" 6
,(lambda (a b)
(< (string-to-number (aref (cadr a) 4))
(string-to-number (aref (cadr b) 4))))
:right-align t)]
"Table format for the error list.")
(defun autocorrect--list-entries ()
"Generated a list of current autocorrections for `autocorrect-list-mode'."
(let (entries)
(maphash
(lambda (misspelling corrections)
(dolist (correction corrections)
(push (list nil
(vector
(propertize misspelling 'face 'default)
""
(propertize (car correction) 'face 'success)
(propertize
(number-to-string (cadr correction))
'face (if (= (cadr correction) 0)
'shadow
'font-lock-number-face))
(propertize
(number-to-string (cddr correction))
'face (if (= (cddr correction) 0)
'shadow
'font-lock-number-face))))
entries)))
autocorrect--table)
entries))
(define-derived-mode autocorrect-list-mode tabulated-list-mode
"Autocorrections"
"Major mode for listing autocorrections.
\\{autocorrect-list-mode-map}"
(setq tabulated-list-format autocorrect--list-format
tabulated-list-sort-key (cons "Misspelling" nil)
tabulated-list-padding 1
tabulated-list-entries #'autocorrect--list-entries)
(setq-local truncate-string-ellipsis "")
(tabulated-list-init-header))
(defun autocorrect-list ()
"List loaded autocorrections."
(interactive)
(let ((buffer (get-buffer-create "*Autocorrections*")))
(with-current-buffer buffer
(autocorrect-list-mode)
(tabulated-list-print))
(switch-to-buffer buffer)))
;;;; Spellchecker interface:
(defun autocorrect--maybe-create-abbrev (misspelling corrected)
"Update the autocorrect table and possibly create an abbrev.
The correction count for MISSPELLING to CORRECTED in the table is incremented by
one, and should the number of corrections exceed
`autocorrect-count-threshold-session' and there be no other recorded
corrections, and abbrev will be created."
(when (and (>= (or (cadr (assoc corrected (gethash misspelling autocorrect--table))) 0)
autocorrect-count-threshold-session)
(= 1 (length (gethash misspelling autocorrect--table))))
(autocorrect--set-abbrev misspelling corrected)
(message "Created new autocorrection: %s ⟶ %s"
(propertize misspelling 'face 'warning)
(propertize corrected 'face 'success))))
(defun autocorrect-record-correction (misspelling corrected)
"Record the correction of MISSPELLING to CORRECTED."
(when (autocorrect--should-downcase-p corrected)
(setq misspelling (downcase misspelling)
corrected (downcase corrected)))
(let ((write-region-inhibit-fsync t) ; Quicker writes, not needed
(coding-system-for-write 'utf-8)
(inhibit-message t))
(write-region
(concat misspelling " " corrected "\n") nil
autocorrect-file t))
(autocorrect--update-table misspelling corrected)
(autocorrect--maybe-create-abbrev misspelling corrected))
;;;; Setup:
;;;###autoload
(defun autocorrect-setup ()
"Read and process the history file into abbrevs."
(autocorrect--setup-abbrevs)
(autocorrect--read)
(autocorrect--sync-abbrevs)
(add-hook 'kill-emacs-hook #'autocorrect-save))
(provide 'autocorrect)
;;; autocorrect.el ends here