+++ /dev/null
-#!/bin/bash
-
-# Function to replace contractions
-replace_contractions() {
- # Desc: Replace ' with ʼ in contractions
- # Note: In contractions of UTF-8 text file, replaces U+0027
- # APOSTROPHE with U+02BC MODIFIER LETTER APOSTROPHE
- # Input: stdin
- # arg1 file path
- # Output: stdout
- # Version: 0.1.1 (BK-2020-03)
- # Depends: GNU sed 4.8
-
- # Check input
- if [[ "$#" -gt 1 ]]; then
- echo "FATAL:Incorrect argument count:$#" 1>&2;
- return 1;
- fi;
-
- if [[ -f "$1" ]]; then
- # Use specified file
- input="$1";
- else
- # Use standard input
- input="-";
- fi;
-
- # Perform substitutions
- ## Note: See https://en.wiktionary.org/wiki/Category:English_contractions
- ## Note: Order of replacements sorted most-specific first.
- sed -E \
- -e "s/(you|You|YOU)'(ren|REN|ven|VEN)'(t|T)/\1ʼ\2ʼ\3/g" \
- -e "s/(you|You|YOU)'(dn|DN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3ʼ\4/g" \
- -e "s/(you|You|YOU)'(d|D|ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \
- -e "s/(you|You|YOU)'(d|D|ll|LL|re|RE|ve|VE)/\1ʼ\2/g" \
- -e "s/(y|Y)'(all|ALL)'(dn|DN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3ʼ\4ʼ\5/g" \
- -e "s/(y|Y)'(all|ALL)'(d|D)'(nt|NT)'(ve|VE)/\1ʼ\2ʼ\3ʼ\4ʼ\5/g" \
- -e "s/(y|Y)'(all|ALL)'(d|D)'(ve|VE)/\1ʼ\2ʼ\3ʼ\4/g" \
- -e "s/(y|Y)'(all|ALL)'(d|D|ll|LL|re|RE|ve|VE)/\1ʼ\2ʼ\3/g" \
- -e "s/(y|Y)'(all|ALL)/\1ʼ\2/g" \
- -e "s/(y|Y)'(ain|AIN)'(t|T)/\1ʼ\2ʼ\3/g" \
- -e "s/(wouldn|Wouldn|WOULDN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3/g" \
- -e "s/(wouldn|Wouldn|WOULDN)'(t|T)/\1ʼ\2/g" \
- -e "s/(won|Won|WON)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3/g" \
- -e "s/(won|Won|WON)'(t|T)/\1ʼ\2/g" \
- -e "s/(who|Who|WHO)'(d|D|ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \
- -e "s/(who|Who|WHO)'(d|D|ll|LL|re|RE|s|S|ve|VE)/\1ʼ\2/g" \
- -e "s/(where|Where|WHERE)'(s|S)/\1ʼ\2/g" \
- -e "s/(what|What|WHAT)'(ll|LL|re|RE|s|S|ve|VE)/\1ʼ\2/g" \
- -e "s/(weren|Weren|WEREN)'(t|T)/\1ʼ\2/g" \
- -e "s/(we|We|WE)'(ven|VEN)'(t|T)/\1ʼ\2ʼ\3/g" \
- -e "s/(we|We|WE)'(ren|REN)'(t|T)/\1ʼ\2ʼ\3/g" \
- -e "s/(we|We|WE)'(d|D|ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \
- -e "s/(we|We|WE)'(d|D|ll|LL|re|RE|ve|VE)/\1ʼ\2/g" \
- -e "s/(they|They|THEY)'(d|D|ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \
- -e "s/(they|They|THEY)'(d|D|ll|LL|re|RE|ve|VE)/\1ʼ\2/g" \
- -e "s/(there|There|THERE)'(ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \
- -e "s/(there|There|THERE)'(s|S|ve|VE)/\1ʼ\2/g" \
- -e "s/(that|That|THAT)'(d|D|ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \
- -e "s/(that|That|THAT)'(d|D|ll|LL|s|S)/\1ʼ\2/g" \
- -e "s/(shouldn|Shouldn|SHOULDN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3/g" \
- -e "s/(shouldn|Shouldn|SHOULDN)'(t|T)/\1ʼ\2/g" \
- -e "s/(she|She|SHE)'(d|D|ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \
- -e "s/(she|She|SHE)'(d|D|ll|LL|s|S)/\1ʼ\2/g" \
- -e "s/(shan|Shan|SHAN)'(t|T)/\1ʼ\2/g" \
- -e "s/(oughtn|Oughtn|OUGHTN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3/g" \
- -e "s/(oughtn|Oughtn|OUGHTN)'(t|T)/\1ʼ\2/g" \
- -e "s/(mustn|Mustn|MUSTN)'(t|T)/\1ʼ\2/g" \
- -e "s/(mustn|Mustn|MUSTN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3/g" \
- -e "s/(mustn|Mustn|MUSTN)'(t|T)/\1ʼ\2/g" \
- -e "s/(mightn|Mightn|MIGHTN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3/g" \
- -e "s/(mightn|Mightn|MIGHTN)'(t|T)/\1ʼ\2/g" \
- -e "s/(might|Might|MIGHT)'(ve|VE)/\1ʼ\2/g" \
- -e "s/(let|Let|LET)'(s|S)/\1ʼ\2/g" \
- -e "s/(it|It|IT)'(d|D|ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \
- -e "s/(it|It|IT)'(d|D|ll|LL|s|S)/\1ʼ\2/g" \
- -e "s/(isn|Isn|ISN)'(t|T)/\1ʼ\2/g" \
- -e "s/(I|i)'(dn|DN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3ʼ\4/g" \
- -e "s/(I|i)'(d|D|ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \
- -e "s/(I|i)'(d|D|ll|LL|m|M|ve|VE)/\1ʼ\2/g" \
- -e "s/(he|He|HE)'(d|D|ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \
- -e "s/(he|He|HE)'(d|D|ll|LL|s|S)/\1ʼ\2/g" \
- -e "s/(haven|Haven|HAVEN)'(t|T)/\1ʼ\2/g" \
- -e "s/(hasn|Hasn|HASN)'(t|T)/\1ʼ\2/g" \
- -e "s/(hadn|Hadn|HADN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3/g" \
- -e "s/(hadn|Hadn|HADN)'(t|T)/\1ʼ\2/g" \
- -e "s/(d|D)'(ya|YA|you|YOU)/\1ʼ\2/g" \
- -e "s/(don|Don|DON)'(t|T)/\1ʼ\2/g" \
- -e "s/(doesn|Doesn|DOESN)'(t|T)/\1ʼ\2/g" \
- -e "s/(didn|Didn|DIDN)'(t|T)/\1ʼ\2/g" \
- -e "s/(could|Could|COULD)'(ve|VE)/\1ʼ\2/g" \
- -e "s/(couldn|Couldn|COULDN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3/g" \
- -e "s/(couldn|Couldn|COULDN)'(t|T)/\1ʼ\2/g" \
- -e "s/(can|Can|CAN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3/g" \
- -e "s/(can|Can|CAN)'(t|T)/\1ʼ\2/g" \
- -e "s/(aren|Aren|AREN)'(t|T)/\1ʼ\2/g" \
- -e "s/(ate|Ate|ATE)'(nt|NT)/\1ʼ\2/g" \
- -e "s/(ain|Ain|AIN)'(t|T)/\1ʼ\2/g" \
- "$input"
-}; # replace ' with ʼ in contractions
-
-# Author: Steven Baltakatei Sandoval
-# License: GPLv3+