Commit | Line | Data |
---|---|---|
265d3d9a SBS |
1 | #!/bin/bash |
2 | ||
0736b7d0 | 3 | function replace_apos() { |
011613d8 SBS |
4 | # Desc: Replace ' with ʼ in text |
5 | # Note: In UTF-8 text file, replaces U+0027 | |
265d3d9a | 6 | # APOSTROPHE with U+02BC MODIFIER LETTER APOSTROPHE |
39496272 | 7 | # Input: stdin |
265d3d9a SBS |
8 | # arg1 file path |
9 | # Output: stdout | |
011613d8 | 10 | # Version: 1.1.2 (BK-2020-03) |
265d3d9a SBS |
11 | # Depends: GNU sed 4.8 |
12 | ||
13 | # Check input | |
14 | if [[ "$#" -gt 1 ]]; then | |
15 | echo "FATAL:Incorrect argument count:$#" 1>&2; | |
16 | return 1; | |
17 | fi; | |
18 | ||
19 | if [[ -f "$1" ]]; then | |
20 | # Use specified file | |
21 | input="$1"; | |
22 | else | |
23 | # Use standard input | |
24 | input="-"; | |
25 | fi; | |
26 | ||
27 | # Perform substitutions | |
7e653610 SBS |
28 | ## Note: See https://en.wiktionary.org/wiki/Category:English_contractions |
29 | ## Note: Order of replacements sorted most-specific first. | |
265d3d9a | 30 | sed -E \ |
7e653610 SBS |
31 | -e "s/(you|You|YOU)'(ren|REN|ven|VEN)'(t|T)/\1ʼ\2ʼ\3/g" \ |
32 | -e "s/(you|You|YOU)'(dn|DN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3ʼ\4/g" \ | |
33 | -e "s/(you|You|YOU)'(d|D|ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \ | |
34 | -e "s/(you|You|YOU)'(d|D|ll|LL|re|RE|ve|VE)/\1ʼ\2/g" \ | |
35 | -e "s/(y|Y)'(all|ALL)'(dn|DN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3ʼ\4ʼ\5/g" \ | |
36 | -e "s/(y|Y)'(all|ALL)'(d|D)'(nt|NT)'(ve|VE)/\1ʼ\2ʼ\3ʼ\4ʼ\5/g" \ | |
37 | -e "s/(y|Y)'(all|ALL)'(d|D)'(ve|VE)/\1ʼ\2ʼ\3ʼ\4/g" \ | |
38 | -e "s/(y|Y)'(all|ALL)'(d|D|ll|LL|re|RE|ve|VE)/\1ʼ\2ʼ\3/g" \ | |
39 | -e "s/(y|Y)'(all|ALL)/\1ʼ\2/g" \ | |
40 | -e "s/(y|Y)'(ain|AIN)'(t|T)/\1ʼ\2ʼ\3/g" \ | |
41 | -e "s/(wouldn|Wouldn|WOULDN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3/g" \ | |
42 | -e "s/(wouldn|Wouldn|WOULDN)'(t|T)/\1ʼ\2/g" \ | |
43 | -e "s/(won|Won|WON)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3/g" \ | |
44 | -e "s/(won|Won|WON)'(t|T)/\1ʼ\2/g" \ | |
45 | -e "s/(who|Who|WHO)'(d|D|ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \ | |
46 | -e "s/(who|Who|WHO)'(d|D|ll|LL|re|RE|s|S|ve|VE)/\1ʼ\2/g" \ | |
eb9061ae | 47 | -e "s/(where|Where|WHERE)'(d|D|s|S)/\1ʼ\2/g" \ |
7e653610 SBS |
48 | -e "s/(what|What|WHAT)'(ll|LL|re|RE|s|S|ve|VE)/\1ʼ\2/g" \ |
49 | -e "s/(weren|Weren|WEREN)'(t|T)/\1ʼ\2/g" \ | |
50 | -e "s/(we|We|WE)'(ven|VEN)'(t|T)/\1ʼ\2ʼ\3/g" \ | |
51 | -e "s/(we|We|WE)'(ren|REN)'(t|T)/\1ʼ\2ʼ\3/g" \ | |
52 | -e "s/(we|We|WE)'(d|D|ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \ | |
53 | -e "s/(we|We|WE)'(d|D|ll|LL|re|RE|ve|VE)/\1ʼ\2/g" \ | |
eb9061ae | 54 | -e "s/(wasn|Wasn|WASN)'(t|T)/\1ʼ\2/g" \ |
7e653610 SBS |
55 | -e "s/(they|They|THEY)'(d|D|ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \ |
56 | -e "s/(they|They|THEY)'(d|D|ll|LL|re|RE|ve|VE)/\1ʼ\2/g" \ | |
57 | -e "s/(there|There|THERE)'(ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \ | |
58 | -e "s/(there|There|THERE)'(s|S|ve|VE)/\1ʼ\2/g" \ | |
59 | -e "s/(that|That|THAT)'(d|D|ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \ | |
60 | -e "s/(that|That|THAT)'(d|D|ll|LL|s|S)/\1ʼ\2/g" \ | |
61 | -e "s/(shouldn|Shouldn|SHOULDN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3/g" \ | |
62 | -e "s/(shouldn|Shouldn|SHOULDN)'(t|T)/\1ʼ\2/g" \ | |
63 | -e "s/(she|She|SHE)'(d|D|ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \ | |
64 | -e "s/(she|She|SHE)'(d|D|ll|LL|s|S)/\1ʼ\2/g" \ | |
65 | -e "s/(shan|Shan|SHAN)'(t|T)/\1ʼ\2/g" \ | |
e424b710 SBS |
66 | -e "s/'(s|S)\b/ʼ\1/g" \ |
67 | -e "s/(s|S)'( |$)/\1ʼ\2/g" \ | |
7e653610 SBS |
68 | -e "s/(oughtn|Oughtn|OUGHTN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3/g" \ |
69 | -e "s/(oughtn|Oughtn|OUGHTN)'(t|T)/\1ʼ\2/g" \ | |
eb9061ae | 70 | -e "s/(o|O)'(clock|CLOCK)/\1ʼ\2/g" \ |
7e653610 SBS |
71 | -e "s/(mustn|Mustn|MUSTN)'(t|T)/\1ʼ\2/g" \ |
72 | -e "s/(mustn|Mustn|MUSTN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3/g" \ | |
73 | -e "s/(mustn|Mustn|MUSTN)'(t|T)/\1ʼ\2/g" \ | |
74 | -e "s/(mightn|Mightn|MIGHTN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3/g" \ | |
75 | -e "s/(mightn|Mightn|MIGHTN)'(t|T)/\1ʼ\2/g" \ | |
76 | -e "s/(might|Might|MIGHT)'(ve|VE)/\1ʼ\2/g" \ | |
77 | -e "s/(let|Let|LET)'(s|S)/\1ʼ\2/g" \ | |
78 | -e "s/(it|It|IT)'(d|D|ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \ | |
79 | -e "s/(it|It|IT)'(d|D|ll|LL|s|S)/\1ʼ\2/g" \ | |
80 | -e "s/(isn|Isn|ISN)'(t|T)/\1ʼ\2/g" \ | |
81 | -e "s/(I|i)'(dn|DN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3ʼ\4/g" \ | |
82 | -e "s/(I|i)'(d|D|ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \ | |
83 | -e "s/(I|i)'(d|D|ll|LL|m|M|ve|VE)/\1ʼ\2/g" \ | |
eb9061ae | 84 | -e "s/(how|How|HOW)'(d|D)/\1ʼ\2/g" \ |
7e653610 SBS |
85 | -e "s/(he|He|HE)'(d|D|ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \ |
86 | -e "s/(he|He|HE)'(d|D|ll|LL|s|S)/\1ʼ\2/g" \ | |
87 | -e "s/(haven|Haven|HAVEN)'(t|T)/\1ʼ\2/g" \ | |
88 | -e "s/(hasn|Hasn|HASN)'(t|T)/\1ʼ\2/g" \ | |
89 | -e "s/(hadn|Hadn|HADN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3/g" \ | |
90 | -e "s/(hadn|Hadn|HADN)'(t|T)/\1ʼ\2/g" \ | |
eb9061ae | 91 | -e "s/'(em\b)/ʼ\1/g" \ |
7e653610 SBS |
92 | -e "s/(d|D)'(ya|YA|you|YOU)/\1ʼ\2/g" \ |
93 | -e "s/(don|Don|DON)'(t|T)/\1ʼ\2/g" \ | |
94 | -e "s/(doesn|Doesn|DOESN)'(t|T)/\1ʼ\2/g" \ | |
95 | -e "s/(didn|Didn|DIDN)'(t|T)/\1ʼ\2/g" \ | |
96 | -e "s/(could|Could|COULD)'(ve|VE)/\1ʼ\2/g" \ | |
97 | -e "s/(couldn|Couldn|COULDN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3/g" \ | |
98 | -e "s/(couldn|Couldn|COULDN)'(t|T)/\1ʼ\2/g" \ | |
eb9061ae | 99 | -e "s/(c|C)'(mere|MERE)/\1ʼ\2/g" \ |
7e653610 SBS |
100 | -e "s/(can|Can|CAN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3/g" \ |
101 | -e "s/(can|Can|CAN)'(t|T)/\1ʼ\2/g" \ | |
eb9061ae SBS |
102 | -e "s/'(cause|Cause|CAUSE)/ʼ\1/g" \ |
103 | -e "s/'(bout|Bout|BOUT)/ʼ\1/g" \ | |
7e653610 SBS |
104 | -e "s/(aren|Aren|AREN)'(t|T)/\1ʼ\2/g" \ |
105 | -e "s/(ate|Ate|ATE)'(nt|NT)/\1ʼ\2/g" \ | |
106 | -e "s/(ain|Ain|AIN)'(t|T)/\1ʼ\2/g" \ | |
107 | "$input" | |
011613d8 | 108 | }; # replace ' with ʼ |
4aa4ed1b SBS |
109 | |
110 | # Author: Steven Baltakatei Sandoval | |
111 | # License: GPLv3+ |