]> zdv2.bktei.com Git - BK-2020-03.git/blob - unitproc/bkt-replace_apos
feat(unitproc/find_erotica.sh):Store script for finding erotica
[BK-2020-03.git] / unitproc / bkt-replace_apos
1 #!/bin/bash
2
3 function replace_apos() {
4 # Desc: Replace ' with ʼ in text
5 # Usage: source bkt-replace_apos; replace_apos [FILE]
6 # Note: In UTF-8 text file, replaces U+0027
7 # APOSTROPHE with U+02BC MODIFIER LETTER APOSTROPHE
8 # Input: stdin
9 # arg1 file path
10 # Output: stdout
11 # Version: 1.3.0 (BK-2020-03)
12 # Depends: GNU sed 4.8
13
14 # Check input
15 if [[ "$#" -gt 1 ]]; then
16 echo "FATAL:Incorrect argument count:$#" 1>&2;
17 return 1;
18 fi;
19
20 if [[ -f "$1" ]]; then
21 # Use specified file
22 input="$1";
23 else
24 # Use standard input
25 input="-";
26 fi;
27
28 # Perform substitutions
29 ## Note: See https://en.wiktionary.org/wiki/Category:English_contractions
30 ## Note: Order of replacements sorted most-specific first.
31 sed -E \
32 -e "s/(you|You|YOU)['’](ren|REN|ven|VEN)['’](t|T)/\1ʼ\2ʼ\3/g" \
33 -e "s/(you|You|YOU)['’](dn|DN)['’](t|T)['’](ve|VE)/\1ʼ\2ʼ\3ʼ\4/g" \
34 -e "s/(you|You|YOU)['’](d|D|ll|LL)['’](ve|VE)/\1ʼ\2ʼ\3/g" \
35 -e "s/(you|You|YOU)['’](d|D|ll|LL|re|RE|ve|VE)/\1ʼ\2/g" \
36 -e "s/(y|Y)['’](all|ALL)['’](dn|DN)['’](t|T)['’](ve|VE)/\1ʼ\2ʼ\3ʼ\4ʼ\5/g" \
37 -e "s/(y|Y)['’](all|ALL)['’](d|D)['’](nt|NT)['’](ve|VE)/\1ʼ\2ʼ\3ʼ\4ʼ\5/g" \
38 -e "s/(y|Y)['’](all|ALL)['’](d|D)['’](ve|VE)/\1ʼ\2ʼ\3ʼ\4/g" \
39 -e "s/(y|Y)['’](all|ALL)['’](d|D|ll|LL|re|RE|ve|VE)/\1ʼ\2ʼ\3/g" \
40 -e "s/(y|Y)['’](all|ALL)/\1ʼ\2/g" \
41 -e "s/(y|Y)['’](ain|AIN)['’](t|T)/\1ʼ\2ʼ\3/g" \
42 -e "s/(wouldn|Wouldn|WOULDN)['’](t|T)['’](ve|VE)/\1ʼ\2ʼ\3/g" \
43 -e "s/(wouldn|Wouldn|WOULDN)['’](t|T)/\1ʼ\2/g" \
44 -e "s/(won|Won|WON)['’](t|T)['’](ve|VE)/\1ʼ\2ʼ\3/g" \
45 -e "s/(won|Won|WON)['’](t|T)/\1ʼ\2/g" \
46 -e "s/(who|Who|WHO)['’](d|D|ll|LL)['’](ve|VE)/\1ʼ\2ʼ\3/g" \
47 -e "s/(who|Who|WHO)['’](d|D|ll|LL|re|RE|s|S|ve|VE)/\1ʼ\2/g" \
48 -e "s/(where|Where|WHERE)['’](d|D|s|S)/\1ʼ\2/g" \
49 -e "s/(what|What|WHAT)['’](ll|LL|re|RE|s|S|ve|VE)/\1ʼ\2/g" \
50 -e "s/(weren|Weren|WEREN)['’](t|T)/\1ʼ\2/g" \
51 -e "s/(we|We|WE)['’](ven|VEN)['’](t|T)/\1ʼ\2ʼ\3/g" \
52 -e "s/(we|We|WE)['’](ren|REN)['’](t|T)/\1ʼ\2ʼ\3/g" \
53 -e "s/(we|We|WE)['’](d|D|ll|LL)['’](ve|VE)/\1ʼ\2ʼ\3/g" \
54 -e "s/(we|We|WE)['’](d|D|ll|LL|re|RE|ve|VE)/\1ʼ\2/g" \
55 -e "s/(wasn|Wasn|WASN)['’](t|T)/\1ʼ\2/g" \
56 -e "s/(they|They|THEY)['’](d|D|ll|LL)['’](ve|VE)/\1ʼ\2ʼ\3/g" \
57 -e "s/(they|They|THEY)['’](d|D|ll|LL|re|RE|ve|VE)/\1ʼ\2/g" \
58 -e "s/(there|There|THERE)['’](ll|LL)['’](ve|VE)/\1ʼ\2ʼ\3/g" \
59 -e "s/(there|There|THERE)['’](s|S|ve|VE)/\1ʼ\2/g" \
60 -e "s/(that|That|THAT)['’](d|D|ll|LL)['’](ve|VE)/\1ʼ\2ʼ\3/g" \
61 -e "s/(that|That|THAT)['’](d|D|ll|LL|s|S)/\1ʼ\2/g" \
62 -e "s/(shouldn|Shouldn|SHOULDN)['’](t|T)['’](ve|VE)/\1ʼ\2ʼ\3/g" \
63 -e "s/(shouldn|Shouldn|SHOULDN)['’](t|T)/\1ʼ\2/g" \
64 -e "s/(she|She|SHE)['’](d|D|ll|LL)['’](ve|VE)/\1ʼ\2ʼ\3/g" \
65 -e "s/(she|She|SHE)['’](d|D|ll|LL|s|S)/\1ʼ\2/g" \
66 -e "s/(shan|Shan|SHAN)['’](t|T)/\1ʼ\2/g" \
67 -e "s/['’](s|S)\b/ʼ\1/g" \
68 -e "s/(s|S)['’]( |,|.|$)/\1ʼ\2/g" \
69 -e "s/(oughtn|Oughtn|OUGHTN)['’](t|T)['’](ve|VE)/\1ʼ\2ʼ\3/g" \
70 -e "s/(oughtn|Oughtn|OUGHTN)['’](t|T)/\1ʼ\2/g" \
71 -e "s/(o|O)['’](clock|CLOCK)/\1ʼ\2/g" \
72 -e "s/(mustn|Mustn|MUSTN)['’](t|T)/\1ʼ\2/g" \
73 -e "s/(mustn|Mustn|MUSTN)['’](t|T)['’](ve|VE)/\1ʼ\2ʼ\3/g" \
74 -e "s/(mustn|Mustn|MUSTN)['’](t|T)/\1ʼ\2/g" \
75 -e "s/(mightn|Mightn|MIGHTN)['’](t|T)['’](ve|VE)/\1ʼ\2ʼ\3/g" \
76 -e "s/(mightn|Mightn|MIGHTN)['’](t|T)/\1ʼ\2/g" \
77 -e "s/(might|Might|MIGHT)['’](ve|VE)/\1ʼ\2/g" \
78 -e "s/(let|Let|LET)['’](s|S)/\1ʼ\2/g" \
79 -e "s/(it|It|IT)['’](d|D|ll|LL)['’](ve|VE)/\1ʼ\2ʼ\3/g" \
80 -e "s/(it|It|IT)['’](d|D|ll|LL|s|S)/\1ʼ\2/g" \
81 -e "s/(isn|Isn|ISN)['’](t|T)/\1ʼ\2/g" \
82 -e "s/(I|i)['’](dn|DN)['’](t|T)['’](ve|VE)/\1ʼ\2ʼ\3ʼ\4/g" \
83 -e "s/(I|i)['’](d|D|ll|LL)['’](ve|VE)/\1ʼ\2ʼ\3/g" \
84 -e "s/(I|i)['’](d|D|ll|LL|m|M|ve|VE)/\1ʼ\2/g" \
85 -e "s/(how|How|HOW)['’](d|D)/\1ʼ\2/g" \
86 -e "s/(he|He|HE)['’](d|D|ll|LL)['’](ve|VE)/\1ʼ\2ʼ\3/g" \
87 -e "s/(he|He|HE)['’](d|D|ll|LL|s|S)/\1ʼ\2/g" \
88 -e "s/(haven|Haven|HAVEN)['’](t|T)/\1ʼ\2/g" \
89 -e "s/(hasn|Hasn|HASN)['’](t|T)/\1ʼ\2/g" \
90 -e "s/(hadn|Hadn|HADN)['’](t|T)['’](ve|VE)/\1ʼ\2ʼ\3/g" \
91 -e "s/(hadn|Hadn|HADN)['’](t|T)/\1ʼ\2/g" \
92 -e "s/['’](em\b)/ʼ\1/g" \
93 -e "s/(d|D)['’](ya|YA|you|YOU)/\1ʼ\2/g" \
94 -e "s/([[:alnum:]])['’](d|D)/\1ʼ\2/g" \
95 -e "s/(don|Don|DON)['’](t|T)/\1ʼ\2/g" \
96 -e "s/(doesn|Doesn|DOESN)['’](t|T)/\1ʼ\2/g" \
97 -e "s/(didn|Didn|DIDN)['’](t|T)/\1ʼ\2/g" \
98 -e "s/(could|Could|COULD)['’](ve|VE)/\1ʼ\2/g" \
99 -e "s/(couldn|Couldn|COULDN)['’](t|T)['’](ve|VE)/\1ʼ\2ʼ\3/g" \
100 -e "s/(couldn|Couldn|COULDN)['’](t|T)/\1ʼ\2/g" \
101 -e "s/(c|C)['’](mere|MERE)/\1ʼ\2/g" \
102 -e "s/(can|Can|CAN)['’](t|T)['’](ve|VE)/\1ʼ\2ʼ\3/g" \
103 -e "s/(can|Can|CAN)['’](t|T)/\1ʼ\2/g" \
104 -e "s/['’](cause|Cause|CAUSE)/ʼ\1/g" \
105 -e "s/['’](bout|Bout|BOUT)/ʼ\1/g" \
106 -e "s/(aren|Aren|AREN)['’](t|T)/\1ʼ\2/g" \
107 -e "s/(ate|Ate|ATE)['’](nt|NT)/\1ʼ\2/g" \
108 -e "s/(ain|Ain|AIN)['’](t|T)/\1ʼ\2/g" \
109 -e "s/([[:alpha:]])['’]([[:alpha:]])/\1ʼ\2/g" \
110 "$input"
111 }; # replace ' with ʼ
112
113 # Author: Steven Baltakatei Sandoval
114 # License: GPLv3+