Elaborate on character usage/meaning

This commit is contained in:
TEC 2024-04-10 22:47:11 +08:00
parent b0c7025372
commit 70871a6b60
Signed by: tec
SSH Key Fingerprint: SHA256:eobz41Mnm0/iYWBvWThftS0ElEs1ftBr6jamutnXc/A
1 changed files with 73 additions and 1 deletions

View File

@ -224,6 +224,78 @@ function memorylayout(io::IO, char::Char)
end
end
# TODO char
const CONTROL_CHARACTERS =
('\x00' => ("NULL", "Null character", "Originally the code of blank paper tape and used as padding to slow transmission. Now often used to indicate the end of a string in C-like languages."),
'\x01' => ("SOH", "Start of Heading", ""),
'\x02' => ("SOT", "Start of Text", ""),
'\x03' => ("ETX", "End of Text", ""),
'\x04' => ("EOT", "End of Transmission", ""),
'\x05' => ("ENQ", "Enquiry", "Trigger a response at the receiving end, to see if it is still present."),
'\x06' => ("ACK", "Acknowledge", "Indication of successful receipt of a message."),
'\x07' => ("BEL", "Bell", "Call for attention from an operator."),
'\x08' => ("HBS", "Backspace", "Move one position leftwards. Next character may overprint or replace the character that was there."),
'\x09' => ("HT", "Horizontal Tab", "Move right to the next tab stop."),
'\x0a' => ("LF", "Line Feed", "Move down to the same position on the next line (some devices also moved to the left column)."),
'\x0b' => ("VT", "Vertical Tab", "Move down to the next vertical tab stop. "),
'\x0c' => ("FF", "Form Feed", "Move down to the top of the next page. "),
'\x0d' => ("CR", "Carriage Return", "Move to column zero while staying on the same line."),
'\x0e' => ("SO", "Shift Out", "Switch to an alternative character set."),
'\x0f' => ("SI", "Shift In", "Return to regular character set after SO."),
'\x10' => ("DLE", "Data Link Escape", "Cause a limited number of contiguously following characters to be interpreted in some different way."),
'\x11' => ("DC1", "Device Control One (XON)", "Used by teletype devices for the paper tape reader and tape punch. Became the de-facto standard for software flow control, now obsolete."),
'\x12' => ("DC2", "Device Control Two", "Used by teletype devices for the paper tape reader and tape punch. Became the de-facto standard for software flow control, now obsolete."),
'\x13' => ("DC3", "Device Control Three (XOFF)", "Used by teletype devices for the paper tape reader and tape punch. Became the de-facto standard for software flow control, now obsolete."),
'\x14' => ("DC4", "Device Control Four", "Used by teletype devices for the paper tape reader and tape punch. Became the de-facto standard for software flow control, now obsolete."),
'\x15' => ("NAK", "Negative Acknowledge", "Negative response to a sender, such as a detected error. "),
'\x16' => ("SYN", "Synchronous Idle", "A transmission control character used by a synchronous transmission system in the absence of any other character (idle condition) to provide a signal from which synchronism may be achieved or retained between data terminal equipment."),
'\x17' => ("ETB", "End of Transmission Block", "End of a transmission block of data when data are divided into such blocks for transmission purposes."),
'\x18' => ("CAN", "Cancel", "A character, or the first character of a sequence, indicating that the data preceding it is in error. As a result, this data is to be ignored. The specific meaning of this character must be defined for each application and/or between sender and recipient."),
'\x19' => ("EM", "End of Medium", "Indicates on paper or magnetic tapes that the end of the usable portion of the tape had been reached."),
'\x1a' => ("SUB", "Substitute/Control-Z", "A control character used in the place of a character that has been found to be invalid or in error. SUB is intended to be introduced by automatic means."),
'\x1b' => ("ESC", "Escape", "A control character which is used to provide additional control functions. It alters the meaning of a limited number of contiguously following bit combinations. The use of this character is specified in ISO-2022."),
'\x1c' => ("FS", "File Separator", "Used to separate and qualify data logically; its specific meaning has to be specified for each application. If this character is used in hierarchical order, it delimits a data item called a file. "),
'\x1d' => ("GS", "Group Separator", "Used to separate and qualify data logically; its specific meaning has to be specified for each application. If this character is used in hierarchical order, it delimits a data item called a group."),
'\x1e' => ("RG", "Record Separator", "Used to separate and qualify data logically; its specific meaning has to be specified for each application. If this character is used in hierarchical order, it delimits a data item called a record."),
'\x1f' => ("US", "Unit Separator", "Used to separate and qualify data logically; its specific meaning has to be specified for each application. If this character is used in hierarchical order, it delimits a data item called a unit."),
'\x7f' => ("DEL", "Delete", "Originally used to delete characters on punched tape by punching out all the holes."))
function elaboration(io::IO, char::Char)
c0index = findfirst(c -> first(c) == char, CONTROL_CHARACTERS)
stychr = styled"{julia_char:$(sprint(show, char))}"
if !isnothing(c0index)
cshort, cname, cinfo = last(CONTROL_CHARACTERS[c0index])
println(io, "\n Control character ", stychr, ": ", cname, " ($cshort)",
ifelse(isempty(cinfo), "", "\n "), cinfo)
elseif isascii(char)
kind = if char in 'a':'z'
"lowercase letter"
elseif char in 'A':'Z'
"uppercase letter"
elseif char in '0':'9'
"numeral"
elseif char == ' '
"space"
elseif char in ('(', ')', '[', ']', '{', '}', '«', '»')
"parenthesis"
elseif char in ('!':'/'..., ':':'@'..., '\\', '^', '_', '`', '|', '~')
"punctuation"
end
println(io, "\n ASCII $kind ", stychr)
elseif char in ('Ç':'ø'..., 'Ø', 'á':'Ñ'..., 'Á':'À', 'ã', 'Ã', 'ð':'Ï'..., 'Ó':'Ý')
println(io, "\n Extended ASCII accented letter ", stychr,
styled" ({julia_number:0x$(string(UInt8(char), base=16))})")
elseif Base.isoverlong(char)
elseif codepoint(char) in 128:255
println(io, "\n Extended ASCII symbol ", stychr,
styled" ({shadow:0x$(string(Int(char), base=16))})")
else
catstr = Base.Unicode.category_string(char)
catabr = Base.Unicode.category_abbrev(char)
println(io, styled"\n Unicode $stychr, category: $catstr ($catabr)")
end
end
end
end
# TODO struct