X-Git-Url: https://zdv2.bktei.com/gitweb/BK-2020-03.git/blobdiff_plain/e424b71045e5259dcdb9f28e70864a7f96499ef9..c21cf1161003f00b20da69cd9054220cea9268eb:/unitproc/bkt-replace_apos diff --git a/unitproc/bkt-replace_apos b/unitproc/bkt-replace_apos index 1ba50fc..1e0cfd3 100644 --- a/unitproc/bkt-replace_apos +++ b/unitproc/bkt-replace_apos @@ -1,14 +1,13 @@ #!/bin/bash -# Function to replace contractions -function replace_apostrophes() { - # Desc: Replace ' with ʼ in contractions - # Note: In contractions of UTF-8 text file, replaces U+0027 +function replace_apos() { + # Desc: Replace ' with ʼ in text + # Note: In UTF-8 text file, replaces U+0027 # APOSTROPHE with U+02BC MODIFIER LETTER APOSTROPHE # Input: stdin # arg1 file path # Output: stdout - # Version: 1.1.0 (BK-2020-03) + # Version: 1.2.0 (BK-2020-03) # Depends: GNU sed 4.8 # Check input @@ -65,7 +64,7 @@ function replace_apostrophes() { -e "s/(she|She|SHE)'(d|D|ll|LL|s|S)/\1ʼ\2/g" \ -e "s/(shan|Shan|SHAN)'(t|T)/\1ʼ\2/g" \ -e "s/'(s|S)\b/ʼ\1/g" \ - -e "s/(s|S)'( |$)/\1ʼ\2/g" \ + -e "s/(s|S)'( |,|.|$)/\1ʼ\2/g" \ -e "s/(oughtn|Oughtn|OUGHTN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3/g" \ -e "s/(oughtn|Oughtn|OUGHTN)'(t|T)/\1ʼ\2/g" \ -e "s/(o|O)'(clock|CLOCK)/\1ʼ\2/g" \ @@ -91,6 +90,7 @@ function replace_apostrophes() { -e "s/(hadn|Hadn|HADN)'(t|T)/\1ʼ\2/g" \ -e "s/'(em\b)/ʼ\1/g" \ -e "s/(d|D)'(ya|YA|you|YOU)/\1ʼ\2/g" \ + -e "s/([[:alnum:]])'(d|D)/\1ʼ\2/g" \ -e "s/(don|Don|DON)'(t|T)/\1ʼ\2/g" \ -e "s/(doesn|Doesn|DOESN)'(t|T)/\1ʼ\2/g" \ -e "s/(didn|Didn|DIDN)'(t|T)/\1ʼ\2/g" \ @@ -105,8 +105,9 @@ function replace_apostrophes() { -e "s/(aren|Aren|AREN)'(t|T)/\1ʼ\2/g" \ -e "s/(ate|Ate|ATE)'(nt|NT)/\1ʼ\2/g" \ -e "s/(ain|Ain|AIN)'(t|T)/\1ʼ\2/g" \ + -e "s/([[:alpha:]])'([[:alpha:]])/\1ʼ\2/g" \ "$input" -}; # replace ' with ʼ in contractions +}; # replace ' with ʼ # Author: Steven Baltakatei Sandoval # License: GPLv3+