feat(unitproc/bkt-replace_contractions):Add Bash function
[BK-2020-03.git] / unitproc / bkt-replace_contractions
1 #!/bin/bash
2
3 # Function to replace contractions
4 replace_contractions() {
5 # Desc: Replace ' with ʼ in contractions
6 # Note: In contractions of UTF-8 text file, replaces U+0027
7 # APOSTROPHE with U+02BC MODIFIER LETTER APOSTROPHE
8 # Input: stdin
9 # arg1 file path
10 # Output: stdout
11 # Version: 0.0.1
12 # Depends: GNU sed 4.8
13
14 # Check input
15 if [[ "$#" -gt 1 ]]; then
16 echo "FATAL:Incorrect argument count:$#" 1>&2;
17 return 1;
18 fi;
19
20 if [[ -f "$1" ]]; then
21 # Use specified file
22 input="$1";
23 else
24 # Use standard input
25 input="-";
26 fi;
27
28 # Perform substitutions
29 sed -E \
30 -e "s/(you're|You're|YOU'RE)/youʼre/gI" \
31 -e "s/(i'm|I'm|I'M)/Iʼm/gI" \
32 -e "s/(you've|You've|YOU'VE)/youʼve/gI" \
33 -e "s/(they're|They're|THEY'RE)/theyʼre/gI" \
34 -e "s/(we're|We're|WE'RE)/weʼre/gI" \
35 -e "s/(they've|They've|THEY'VE)/theyʼve/gI" \
36 -e "s/(we've|We've|WE'VE)/weʼve/gI" \
37 -e "s/(i've|I've|I'VE)/Iʼve/gI" \
38 -e "s/(that's|That's|THAT'S)/thatʼs/gI" \
39 -e "s/(what's|What's|WHAT'S)/whatʼs/gI" \
40 -e "s/(here's|Here's|HERE'S)/hereʼs/gI" \
41 -e "s/(there's|There's|THERE'S)/thereʼs/gI" \
42 -e "s/(where's|Where's|WHERE'S)/whereʼs/gI" \
43 -e "s/(who's|Who's|WHO'S)/whoʼs/gI" \
44 -e "s/(how's|How's|HOW'S)/howʼs/gI" \
45 -e "s/(doesn't|Doesn't|DOESN'T)/doesnʼt/gI" \
46 -e "s/(don't|Don't|DON'T)/donʼt/gI" \
47 -e "s/(i'll|I'll|I'LL)/Iʼll/gI" \
48 -e "s/(we'll|We'll|WE'LL)/weʼll/gI" \
49 -e "s/(they'll|They'll|THEY'LL)/theyʼll/gI" \
50 "$input";
51 }; # replace ' with ʼ in contractions