| 1 | #!/bin/bash |
| 2 | |
| 3 | # Function to replace contractions |
| 4 | replace_contractions() { |
| 5 | # Desc: Replace ' with ʼ in contractions |
| 6 | # Note: In contractions of UTF-8 text file, replaces U+0027 |
| 7 | # APOSTROPHE with U+02BC MODIFIER LETTER APOSTROPHE |
| 8 | # Input: stdin |
| 9 | # arg1 file path |
| 10 | # Output: stdout |
| 11 | # Version: 0.0.1 |
| 12 | # Depends: GNU sed 4.8 |
| 13 | |
| 14 | # Check input |
| 15 | if [[ "$#" -gt 1 ]]; then |
| 16 | echo "FATAL:Incorrect argument count:$#" 1>&2; |
| 17 | return 1; |
| 18 | fi; |
| 19 | |
| 20 | if [[ -f "$1" ]]; then |
| 21 | # Use specified file |
| 22 | input="$1"; |
| 23 | else |
| 24 | # Use standard input |
| 25 | input="-"; |
| 26 | fi; |
| 27 | |
| 28 | # Perform substitutions |
| 29 | sed -E \ |
| 30 | -e "s/(you're|You're|YOU'RE)/youʼre/gI" \ |
| 31 | -e "s/(i'm|I'm|I'M)/Iʼm/gI" \ |
| 32 | -e "s/(you've|You've|YOU'VE)/youʼve/gI" \ |
| 33 | -e "s/(they're|They're|THEY'RE)/theyʼre/gI" \ |
| 34 | -e "s/(we're|We're|WE'RE)/weʼre/gI" \ |
| 35 | -e "s/(they've|They've|THEY'VE)/theyʼve/gI" \ |
| 36 | -e "s/(we've|We've|WE'VE)/weʼve/gI" \ |
| 37 | -e "s/(i've|I've|I'VE)/Iʼve/gI" \ |
| 38 | -e "s/(that's|That's|THAT'S)/thatʼs/gI" \ |
| 39 | -e "s/(what's|What's|WHAT'S)/whatʼs/gI" \ |
| 40 | -e "s/(here's|Here's|HERE'S)/hereʼs/gI" \ |
| 41 | -e "s/(there's|There's|THERE'S)/thereʼs/gI" \ |
| 42 | -e "s/(where's|Where's|WHERE'S)/whereʼs/gI" \ |
| 43 | -e "s/(who's|Who's|WHO'S)/whoʼs/gI" \ |
| 44 | -e "s/(how's|How's|HOW'S)/howʼs/gI" \ |
| 45 | -e "s/(doesn't|Doesn't|DOESN'T)/doesnʼt/gI" \ |
| 46 | -e "s/(don't|Don't|DON'T)/donʼt/gI" \ |
| 47 | -e "s/(i'll|I'll|I'LL)/Iʼll/gI" \ |
| 48 | -e "s/(we'll|We'll|WE'LL)/weʼll/gI" \ |
| 49 | -e "s/(they'll|They'll|THEY'LL)/theyʼll/gI" \ |
| 50 | "$input"; |
| 51 | }; # replace ' with ʼ in contractions |