summaryrefslogtreecommitdiff
path: root/capitalise
diff options
context:
space:
mode:
authorDavid Wührer <def@gmx.at>2023-05-13 17:15:33 +0200
committerDavid Wührer <def@gmx.at>2023-05-13 17:15:33 +0200
commit9bd7b1078f0d23bd452110451d1762f118e99fbf (patch)
tree91791ba13253d28039564cd555a37d0628fd4ea6 /capitalise
parentdc3b063ef6b370d1b3594ed785c682ea2db09b41 (diff)
improvements
Diffstat (limited to 'capitalise')
-rwxr-xr-xcapitalise295
1 files changed, 25 insertions, 270 deletions
diff --git a/capitalise b/capitalise
index 4d0dfa1..479c55d 100755
--- a/capitalise
+++ b/capitalise
@@ -1,8 +1,6 @@
#!/bin/bash
-# For bold and italic we can use the Mathematical font variant mappings,
-# although those are only defined for Latin and Greek.
# Latin letter small capital a: UTF-8: e1 b4 80 UTF-16BE: 1d00
# .0 .1 .2 .3 .4 .5 .6 .7 .8 .9 .A .B .C .D .E .F
# 1D0. ᴀ ᴁ ᴂ ᴃ ᴄ ᴅ ᴆ ᴇ ᴈ ᴉ ᴊ ᴋ ᴌ ᴍ ᴎ ᴏ
@@ -17,33 +15,36 @@
# 1D9. ᶐ ᶑ ᶒ ᶓ ᶔ ᶕ ᶖ ᶗ ᶘ ᶙ ᶚ ᶛ ᶜ ᶝ ᶞ ᶟ
# 1DA. ᶠ ᶡ ᶢ ᶣ ᶤ ᶥ ᶦ ᶧ ᶨ ᶩ ᶪ ᶫ ᶬ ᶭ ᶮ ᶯ
# 1DB. ᶰ ᶱ ᶲ ᶳ ᶴ ᶵ ᶶ ᶷ ᶸ ᶹ ᶺ ᶻ ᶼ ᶽ ᶾ ᶿ
-declare -a smallcaps=(
+# TODO: Greek letter small capital
+declare -A smallcaps=(
[a]=$'\xe1\xb4\x80'
-# [b]=$'\xe1\xb4\x83'
+ [æ]=$'\xe1\xb4\x81'
+ [b]=$'\xca\x99'
[c]=$'\xe1\xb4\x84'
-# [d]=$'\xe1\xb4\x85'
-# [e]=$'\xe1\xb4\x86'
-# [f]=$'\xe1\xb4\x87'
-# [g]=$'\xe1\xb4\x88'
-# [h]=$'\xe1\xb4\x89'
+ [d]=$'\xe1\xb4\x85'
+ [e]=$'\xe1\xb4\x87'
+ [f]=$'\xea\x9c\xb0'
+ [g]=$'\xc9\xa2'
+ [h]=$'\xca\x9c'
[i]=$'\xc9\xaa'
-# [j]=$'\xe1\xb4\x8b'
-# [k]=$'\xe1\xb4\x8c'
+ [j]=$'\xe1\xb4\x8a'
+ [k]=$'\xe1\xb4\x8b'
[l]=$'\xca\x9f'
[m]=$'\xe1\xb4\x8d'
[n]=$'\xc9\xb4'
[o]=$'\xe1\xb4\x8f'
+ [œ]=$'\xc9\xb6'
[p]=$'\xe1\xb4\x98'
-# [q]=$'\xe1\xb4\x99'
-# [r]=$'\xe1\xb4\x9a'
-# [s]=$'\xe1\xb4\x9b'
+ [q]=$'\xea\x9e\xaf'
+ [r]=$'\xca\x80'
+ [s]=$'\xea\x9c\xb1'
[t]=$'\xe1\xb4\x9b'
-# [u]=$'\xe1\xb4\x95'
-# [v]=$'\xe1\xb4\x96'
-# [w]=$'\xe1\xb4\x97'
-# [x]=$'\xe1\xb4\x98'
-# [y]=$'\xe1\xb4\x99'
-# [z]=$'\xe1\xb4\x9a'
+ [u]=$'\xe1\xb4\x9c'
+ [v]=$'\xe1\xb4\xa0'
+ [w]=$'\xe1\xb4\xa1'
+ [x]=x # Missing
+ [y]=$'\xca\x8f'
+ [z]=$'\xe1\xb4\xa2'
# []=$'\x1d\x'
# []=$'\x1d\x'
# []=$'\x1d\x'
@@ -119,253 +120,6 @@ declare -a smallcaps=(
# []=$'\x1d\x'
# []=$'\x1d\x'
)
-# Latin bold capital: u1d400 - u1d419
-# Latin bold small: u1d41a - u1d433
-# Latin italic capital: u1d434 - u1d44d
-# Latin italic small: u1d44e - u1d467
-# Latin bold italic capital: u1d468 - u1d481
-# Latin bold italic small: u1d482 - u1d49b
-# (Script capital, script small, bold script capital, bold script small)
-# (Fraktur capital, fraktur small)
-# (Double-struck capital, double-struck small)
-# (some Hebrew symbol variants)
-# (Bold fraktur capital, bold fraktur small)
-# (Sans-serif capital, sans-serif small)
-# (Sans-serif bold capital, sans-serif bold small)
-# (Sans-serif italic capital, sans-serif italic small)
-# (Sans-serif bold italic capital, sans-serif bold italic small)
-# (Monospace capital, monospace small) U+1D670 MATHEMATICAL MONOSPACE CAPITAL A
-# Greek bold capital: u1d6a8 - u1d6c0
-# Greek bold small: u1d6c2 - u1d6da
-# Greek italic capital: u1d6e2 - u1d6fa
-# Greek italic small: u1dcfc - u1d714
-# Greek bold italic capital: u1d71c - u1d734
-# Greek bold italic small: u1d736 - u1d74e
-# (Greek sans-serif bold capital: u1d756 - u1d76e)
-# (Greek sans-serif bold small: u1d770 - u1d788)
-# (Greek sans-serif bold italic capital: u1d790 - u1d7a8)
-# (Greek sans-serif bold italic small: u1d7aa - u1d7c2)
-# bold digits: u1d7ce - u1d7d7
-# (double-struck digits)
-# (sans-serif digits)
-# (sans-serif bold digits)
-# (monospace digits)
-# (Arabic mathematical)
-# (Arabic stretched)
-# (Arabic looped)
-# (Arabic double-struck)
-#
-# It turns out there are no Unicode control codes for marking text
-# bold or italic. There are for subscript and superscript,
-# and ASCII has similar codes which nobody ever uses.
-
-declare -A bold=(
- [A]=$'\xf0\x9d\x90\x80'
- [B]=$'\xf0\x9d\x90\x81'
- [C]=$'\xf0\x9d\x90\x82'
- [D]=$'\xf0\x9d\x90\x83'
- [E]=$'\xf0\x9d\x90\x84'
- [F]=$'\xf0\x9d\x90\x85'
- [G]=$'\xf0\x9d\x90\x86'
- [H]=$'\xf0\x9d\x90\x87'
- [I]=$'\xf0\x9d\x90\x88'
- [J]=$'\xf0\x9d\x90\x89'
- [K]=$'\xf0\x9d\x90\x8a'
- [L]=$'\xf0\x9d\x90\x8b'
- [M]=$'\xf0\x9d\x90\x8c'
- [N]=$'\xf0\x9d\x90\x8d'
- [O]=$'\xf0\x9d\x90\x8e'
- [P]=$'\xf0\x9d\x90\x8f'
- [Q]=$'\xf0\x9d\x90\x90'
- [R]=$'\xf0\x9d\x90\x91'
- [S]=$'\xf0\x9d\x90\x92'
- [T]=$'\xf0\x9d\x90\x93'
- [U]=$'\xf0\x9d\x90\x94'
- [V]=$'\xf0\x9d\x90\x95'
- [W]=$'\xf0\x9d\x90\x96'
- [X]=$'\xf0\x9d\x90\x97'
- [Y]=$'\xf0\x9d\x90\x98'
- [Z]=$'\xf0\x9d\x90\x99'
- [a]=$'\xf0\x9d\x90\x9a'
- [b]=$'\xf0\x9d\x90\x9b'
- [c]=$'\xf0\x9d\x90\x9c'
- [d]=$'\xf0\x9d\x90\x9d'
- [e]=$'\xf0\x9d\x90\x9e'
- [f]=$'\xf0\x9d\x90\x9f'
- [g]=$'\xf0\x9d\x90\xa0'
- [h]=$'\xf0\x9d\x90\xa1'
- [i]=$'\xf0\x9d\x90\xa2'
- [j]=$'\xf0\x9d\x90\xa3'
- [k]=$'\xf0\x9d\x90\xa4'
- [l]=$'\xf0\x9d\x90\xa5'
- [m]=$'\xf0\x9d\x90\xa6'
- [n]=$'\xf0\x9d\x90\xa7'
- [o]=$'\xf0\x9d\x90\xa8'
- [p]=$'\xf0\x9d\x90\xa9'
- [q]=$'\xf0\x9d\x90\xaa'
- [r]=$'\xf0\x9d\x90\xab'
- [s]=$'\xf0\x9d\x90\xac'
- [t]=$'\xf0\x9d\x90\xad'
- [u]=$'\xf0\x9d\x90\xae'
- [v]=$'\xf0\x9d\x90\xaf'
- [w]=$'\xf0\x9d\x90\xb0'
- [x]=$'\xf0\x9d\x90\xb1'
- [y]=$'\xf0\x9d\x90\xb2'
- [z]=$'\xf0\x9d\x90\xb3'
- [Α]=$'\xf0\x9d\x9a\xa8'
- [Β]=$'\xf0\x9d\x9a\xa9'
- [Γ]=$'\xf0\x9d\x9a\xaa'
- [Δ]=$'\xf0\x9d\x9a\xab'
- [Ε]=$'\xf0\x9d\x9a\xac'
- [Ζ]=$'\xf0\x9d\x9a\xad'
- [Η]=$'\xf0\x9d\x9a\xae'
- [Θ]=$'\xf0\x9d\x9a\xaf'
- [Ι]=$'\xf0\x9d\x9a\xb0'
- [Κ]=$'\xf0\x9d\x9a\xb1'
- [Λ]=$'\xf0\x9d\x9a\xb2'
- [Μ]=$'\xf0\x9d\x9a\xb3'
- [Ν]=$'\xf0\x9d\x9a\xb4'
- [Ξ]=$'\xf0\x9d\x9a\xb5'
- [Ο]=$'\xf0\x9d\x9a\xb6'
- [Π]=$'\xf0\x9d\x9a\xb7'
- [Ρ]=$'\xf0\x9d\x9a\xb8'
- [Σ]=$'\xf0\x9d\x9a\xba'
- [Τ]=$'\xf0\x9d\x9a\xbb'
- [Υ]=$'\xf0\x9d\x9a\xbc'
- [Φ]=$'\xf0\x9d\x9a\xbd'
- [Χ]=$'\xf0\x9d\x9a\xbe'
- [Ψ]=$'\xf0\x9d\x9a\xbf'
- [Ω]=$'\xf0\x9d\x9b\x80'
- [α]=$'\xf0\x9d\x9b\x82'
- [β]=$'\xf0\x9d\x9b\x83'
- [γ]=$'\xf0\x9d\x9b\x84'
- [δ]=$'\xf0\x9d\x9b\x85'
- [ε]=$'\xf0\x9d\x9b\x86'
- [ζ]=$'\xf0\x9d\x9b\x87'
- [η]=$'\xf0\x9d\x9b\x88'
- [θ]=$'\xf0\x9d\x9b\x89'
- [ι]=$'\xf0\x9d\x9b\x8a'
- [κ]=$'\xf0\x9d\x9b\x8b'
- [λ]=$'\xf0\x9d\x9b\x8c'
- [μ]=$'\xf0\x9d\x9b\x8d'
- [ν]=$'\xf0\x9d\x9b\x8e'
- [ξ]=$'\xf0\x9d\x9b\x8f'
- [ο]=$'\xf0\x9d\x9b\x90'
- [π]=$'\xf0\x9d\x9b\x91'
- [ρ]=$'\xf0\x9d\x9b\x92'
- [ς]=$'\xf0\x9d\x9b\x93'
- [σ]=$'\xf0\x9d\x9b\x94'
- [τ]=$'\xf0\x9d\x9b\x95'
- [υ]=$'\xf0\x9d\x9b\x96'
- [φ]=$'\xf0\x9d\x9b\x97'
- [χ]=$'\xf0\x9d\x9b\x98'
- [ψ]=$'\xf0\x9d\x9b\x99'
- [ω]=$'\xf0\x9d\x9b\x9a'
-)
-
-declare -A italic=(
- [A]=$'\xf0\x9d\x90\xb4'
- [B]=$'\xf0\x9d\x90\xb5'
- [C]=$'\xf0\x9d\x90\xb6'
- [D]=$'\xf0\x9d\x90\xb7'
- [E]=$'\xf0\x9d\x90\xb8'
- [F]=$'\xf0\x9d\x90\xb9'
- [G]=$'\xf0\x9d\x90\xba'
- [H]=$'\xf0\x9d\x90\xbb'
- [I]=$'\xf0\x9d\x90\xbc'
- [J]=$'\xf0\x9d\x90\xbd'
- [K]=$'\xf0\x9d\x90\xbe'
- [L]=$'\xf0\x9d\x90\xbf'
- [M]=$'\xf0\x9d\x91\x80'
- [N]=$'\xf0\x9d\x91\x81'
- [O]=$'\xf0\x9d\x91\x82'
- [P]=$'\xf0\x9d\x91\x83'
- [Q]=$'\xf0\x9d\x91\x84'
- [R]=$'\xf0\x9d\x91\x85'
- [S]=$'\xf0\x9d\x91\x86'
- [T]=$'\xf0\x9d\x91\x87'
- [U]=$'\xf0\x9d\x91\x88'
- [V]=$'\xf0\x9d\x91\x89'
- [W]=$'\xf0\x9d\x91\x8a'
- [X]=$'\xf0\x9d\x91\x8b'
- [Y]=$'\xf0\x9d\x91\x8c'
- [Z]=$'\xf0\x9d\x91\x8d'
- [a]=$'\xf0\x9d\x91\x8e'
- [b]=$'\xf0\x9d\x91\x8f'
- [c]=$'\xf0\x9d\x91\x90'
- [d]=$'\xf0\x9d\x91\x91'
- [e]=$'\xf0\x9d\x91\x92'
- [f]=$'\xf0\x9d\x91\x93'
- [g]=$'\xf0\x9d\x91\x94'
- [h]=$'\xf0\x9d\x98\xa9' # Sans-serif, because f0 9d 91 95 is not assigned.
- [i]=$'\xf0\x9d\x91\x96'
- [j]=$'\xf0\x9d\x91\x97'
- [k]=$'\xf0\x9d\x91\x98'
- [l]=$'\xf0\x9d\x91\x99'
- [m]=$'\xf0\x9d\x91\x9a'
- [n]=$'\xf0\x9d\x91\x9b'
- [o]=$'\xf0\x9d\x91\x9c'
- [p]=$'\xf0\x9d\x91\x9d'
- [q]=$'\xf0\x9d\x91\x9e'
- [r]=$'\xf0\x9d\x91\x9f'
- [s]=$'\xf0\x9d\x91\xa0'
- [t]=$'\xf0\x9d\x91\xa1'
- [u]=$'\xf0\x9d\x91\xa2'
- [v]=$'\xf0\x9d\x91\xa3'
- [w]=$'\xf0\x9d\x91\xa4'
- [x]=$'\xf0\x9d\x91\xa5'
- [y]=$'\xf0\x9d\x91\xa6'
- [z]=$'\xf0\x9d\x91\xa7'
- [Α]=$'\xf0\x9d\x9b\xa2'
- [Β]=$'\xf0\x9d\x9b\xa3'
- [Γ]=$'\xf0\x9d\x9b\xa4'
- [Δ]=$'\xf0\x9d\x9b\xa5'
- [Ε]=$'\xf0\x9d\x9b\xa6'
- [Ζ]=$'\xf0\x9d\x9b\xa7'
- [Η]=$'\xf0\x9d\x9b\xa8'
- [Θ]=$'\xf0\x9d\x9b\xa9'
- [Ι]=$'\xf0\x9d\x9b\xaa'
- [Κ]=$'\xf0\x9d\x9b\xab'
- [Λ]=$'\xf0\x9d\x9b\xac'
- [Μ]=$'\xf0\x9d\x9b\xad'
- [Ν]=$'\xf0\x9d\x9b\xae'
- [Ξ]=$'\xf0\x9d\x9b\xaf'
- [Ο]=$'\xf0\x9d\x9b\xb0'
- [Π]=$'\xf0\x9d\x9b\xb1'
- [Ρ]=$'\xf0\x9d\x9b\xb2'
- [Σ]=$'\xf0\x9d\x9b\xb4'
- [Τ]=$'\xf0\x9d\x9b\xb5'
- [Υ]=$'\xf0\x9d\x9b\xb6'
- [Φ]=$'\xf0\x9d\x9b\xb7'
- [Χ]=$'\xf0\x9d\x9b\xb8'
- [Ψ]=$'\xf0\x9d\x9b\xb9'
- [Ω]=$'\xf0\x9d\x9b\xba'
- [α]=$'\xf0\x9d\x9b\xbc'
- [β]=$'\xf0\x9d\x9b\xbd'
- [γ]=$'\xf0\x9d\x9b\xbe'
- [δ]=$'\xf0\x9d\x9b\xbf'
- [ε]=$'\xf0\x9d\x9c\x80'
- [ζ]=$'\xf0\x9d\x9c\x81'
- [η]=$'\xf0\x9d\x9c\x82'
- [θ]=$'\xf0\x9d\x9c\x83'
- [ι]=$'\xf0\x9d\x9c\x84'
- [κ]=$'\xf0\x9d\x9c\x85'
- [λ]=$'\xf0\x9d\x9c\x86'
- [μ]=$'\xf0\x9d\x9c\x87'
- [ν]=$'\xf0\x9d\x9c\x88'
- [ξ]=$'\xf0\x9d\x9c\x89'
- [ο]=$'\xf0\x9d\x9c\x8a'
- [π]=$'\xf0\x9d\x9c\x8b'
- [ρ]=$'\xf0\x9d\x9c\x8c'
- [ς]=$'\xf0\x9d\x9c\x8d'
- [σ]=$'\xf0\x9d\x9c\x8e'
- [τ]=$'\xf0\x9d\x9c\x8f'
- [υ]=$'\xf0\x9d\x9c\x90'
- [φ]=$'\xf0\x9d\x9c\x91'
- [χ]=$'\xf0\x9d\x9c\x92'
- [ψ]=$'\xf0\x9d\x9c\x93'
- [ω]=$'\xf0\x9d\x9c\x94'
-)
while IFS= read -r line
do
@@ -373,9 +127,10 @@ do
for i in $(seq 0 "$((${#line}-1))")
do
c1=${line:$i:1}
- if [[ $c1 =~ [a-zA-Zα-ωΑ-Ω] ]]
- then replacement+=${italic[$c1]}
- else replacement+=$'\u0331'$c1
+ # if [[ $c1 =~ [a-zA-Zα-ωΑ-Ω] ]]
+ if [[ $c1 =~ [a-zæœ] ]]
+ then replacement+=${smallcaps[$c1]}
+ else replacement+=$c1
fi
done
echo "$replacement"