Commit | Line | Data |
---|---|---|
265d3d9a SBS |
1 | #!/bin/bash |
2 | ||
3 | # Function to replace contractions | |
4 | replace_contractions() { | |
5 | # Desc: Replace ' with ʼ in contractions | |
6 | # Note: In contractions of UTF-8 text file, replaces U+0027 | |
7 | # APOSTROPHE with U+02BC MODIFIER LETTER APOSTROPHE | |
8 | # Input: stdin | |
9 | # arg1 file path | |
10 | # Output: stdout | |
11 | # Version: 0.0.1 | |
12 | # Depends: GNU sed 4.8 | |
13 | ||
14 | # Check input | |
15 | if [[ "$#" -gt 1 ]]; then | |
16 | echo "FATAL:Incorrect argument count:$#" 1>&2; | |
17 | return 1; | |
18 | fi; | |
19 | ||
20 | if [[ -f "$1" ]]; then | |
21 | # Use specified file | |
22 | input="$1"; | |
23 | else | |
24 | # Use standard input | |
25 | input="-"; | |
26 | fi; | |
27 | ||
28 | # Perform substitutions | |
29 | sed -E \ | |
30 | -e "s/(you're|You're|YOU'RE)/youʼre/gI" \ | |
31 | -e "s/(i'm|I'm|I'M)/Iʼm/gI" \ | |
32 | -e "s/(you've|You've|YOU'VE)/youʼve/gI" \ | |
33 | -e "s/(they're|They're|THEY'RE)/theyʼre/gI" \ | |
34 | -e "s/(we're|We're|WE'RE)/weʼre/gI" \ | |
35 | -e "s/(they've|They've|THEY'VE)/theyʼve/gI" \ | |
36 | -e "s/(we've|We've|WE'VE)/weʼve/gI" \ | |
37 | -e "s/(i've|I've|I'VE)/Iʼve/gI" \ | |
38 | -e "s/(that's|That's|THAT'S)/thatʼs/gI" \ | |
39 | -e "s/(what's|What's|WHAT'S)/whatʼs/gI" \ | |
40 | -e "s/(here's|Here's|HERE'S)/hereʼs/gI" \ | |
41 | -e "s/(there's|There's|THERE'S)/thereʼs/gI" \ | |
42 | -e "s/(where's|Where's|WHERE'S)/whereʼs/gI" \ | |
43 | -e "s/(who's|Who's|WHO'S)/whoʼs/gI" \ | |
44 | -e "s/(how's|How's|HOW'S)/howʼs/gI" \ | |
45 | -e "s/(doesn't|Doesn't|DOESN'T)/doesnʼt/gI" \ | |
46 | -e "s/(don't|Don't|DON'T)/donʼt/gI" \ | |
47 | -e "s/(i'll|I'll|I'LL)/Iʼll/gI" \ | |
48 | -e "s/(we'll|We'll|WE'LL)/weʼll/gI" \ | |
49 | -e "s/(they'll|They'll|THEY'LL)/theyʼll/gI" \ | |
50 | "$input"; | |
51 | }; # replace ' with ʼ in contractions |