rename(unitproc/bkt-replace_apostrophes):Was replace_contractions
authorSteven Baltakatei Sandoval <baltakatei@gmail.com>
Thu, 25 Jan 2024 23:36:03 +0000 (23:36 +0000)
committerSteven Baltakatei Sandoval <baltakatei@gmail.com>
Thu, 25 Jan 2024 23:36:03 +0000 (23:36 +0000)
- Note: Renaming function to be more generally applicable for
apostrophe replacement in not only contractions but possessive cases.

unitproc/bkt-replace_apostrophes [moved from unitproc/bkt-replace_contractions with 91% similarity]

similarity index 91%
rename from unitproc/bkt-replace_contractions
rename to unitproc/bkt-replace_apostrophes
index e03e65987a844db3dddfdc2dd085358a3ff3634f..f045c1b410fa8db0d87ab34264ddaf00e7ff6ea7 100644 (file)
@@ -1,14 +1,14 @@
 #!/bin/bash
 
 # Function to replace contractions
-replace_contractions() {
+replace_apostrophes() {
     # Desc: Replace ' with ʼ in contractions
     # Note: In contractions of UTF-8 text file, replaces U+0027
     #   APOSTROPHE with U+02BC MODIFIER LETTER APOSTROPHE
     # Input: stdin  
     #        arg1   file path
     # Output: stdout
-    # Version: 0.1.1 (BK-2020-03)
+    # Version: 1.0.0 (BK-2020-03)
     # Depends: GNU sed 4.8
 
     # Check input
@@ -45,13 +45,14 @@ replace_contractions() {
         -e "s/(won|Won|WON)'(t|T)/\1ʼ\2/g" \
         -e "s/(who|Who|WHO)'(d|D|ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \
         -e "s/(who|Who|WHO)'(d|D|ll|LL|re|RE|s|S|ve|VE)/\1ʼ\2/g" \
-        -e "s/(where|Where|WHERE)'(s|S)/\1ʼ\2/g" \
+        -e "s/(where|Where|WHERE)'(d|D|s|S)/\1ʼ\2/g" \
         -e "s/(what|What|WHAT)'(ll|LL|re|RE|s|S|ve|VE)/\1ʼ\2/g" \
         -e "s/(weren|Weren|WEREN)'(t|T)/\1ʼ\2/g" \
         -e "s/(we|We|WE)'(ven|VEN)'(t|T)/\1ʼ\2ʼ\3/g" \
         -e "s/(we|We|WE)'(ren|REN)'(t|T)/\1ʼ\2ʼ\3/g" \
         -e "s/(we|We|WE)'(d|D|ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \
         -e "s/(we|We|WE)'(d|D|ll|LL|re|RE|ve|VE)/\1ʼ\2/g" \
+        -e "s/(wasn|Wasn|WASN)'(t|T)/\1ʼ\2/g" \
         -e "s/(they|They|THEY)'(d|D|ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \
         -e "s/(they|They|THEY)'(d|D|ll|LL|re|RE|ve|VE)/\1ʼ\2/g" \
         -e "s/(there|There|THERE)'(ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \
@@ -63,8 +64,10 @@ replace_contractions() {
         -e "s/(she|She|SHE)'(d|D|ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \
         -e "s/(she|She|SHE)'(d|D|ll|LL|s|S)/\1ʼ\2/g" \
         -e "s/(shan|Shan|SHAN)'(t|T)/\1ʼ\2/g" \
+        -e "s/'(s\b)/ʼ\1/g" \
         -e "s/(oughtn|Oughtn|OUGHTN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3/g" \
         -e "s/(oughtn|Oughtn|OUGHTN)'(t|T)/\1ʼ\2/g" \
+        -e "s/(o|O)'(clock|CLOCK)/\1ʼ\2/g" \
         -e "s/(mustn|Mustn|MUSTN)'(t|T)/\1ʼ\2/g" \
         -e "s/(mustn|Mustn|MUSTN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3/g" \
         -e "s/(mustn|Mustn|MUSTN)'(t|T)/\1ʼ\2/g" \
@@ -78,12 +81,14 @@ replace_contractions() {
         -e "s/(I|i)'(dn|DN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3ʼ\4/g" \
         -e "s/(I|i)'(d|D|ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \
         -e "s/(I|i)'(d|D|ll|LL|m|M|ve|VE)/\1ʼ\2/g" \
+        -e "s/(how|How|HOW)'(d|D)/\1ʼ\2/g" \
         -e "s/(he|He|HE)'(d|D|ll|LL)'(ve|VE)/\1ʼ\2ʼ\3/g" \
         -e "s/(he|He|HE)'(d|D|ll|LL|s|S)/\1ʼ\2/g" \
         -e "s/(haven|Haven|HAVEN)'(t|T)/\1ʼ\2/g" \
         -e "s/(hasn|Hasn|HASN)'(t|T)/\1ʼ\2/g" \
         -e "s/(hadn|Hadn|HADN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3/g" \
         -e "s/(hadn|Hadn|HADN)'(t|T)/\1ʼ\2/g" \
+        -e "s/'(em\b)/ʼ\1/g" \
         -e "s/(d|D)'(ya|YA|you|YOU)/\1ʼ\2/g" \
         -e "s/(don|Don|DON)'(t|T)/\1ʼ\2/g" \
         -e "s/(doesn|Doesn|DOESN)'(t|T)/\1ʼ\2/g" \
@@ -91,8 +96,11 @@ replace_contractions() {
         -e "s/(could|Could|COULD)'(ve|VE)/\1ʼ\2/g" \
         -e "s/(couldn|Couldn|COULDN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3/g" \
         -e "s/(couldn|Couldn|COULDN)'(t|T)/\1ʼ\2/g" \
+        -e "s/(c|C)'(mere|MERE)/\1ʼ\2/g" \
         -e "s/(can|Can|CAN)'(t|T)'(ve|VE)/\1ʼ\2ʼ\3/g" \
         -e "s/(can|Can|CAN)'(t|T)/\1ʼ\2/g" \
+        -e "s/'(cause|Cause|CAUSE)/ʼ\1/g" \
+        -e "s/'(bout|Bout|BOUT)/ʼ\1/g" \
         -e "s/(aren|Aren|AREN)'(t|T)/\1ʼ\2/g" \
         -e "s/(ate|Ate|ATE)'(nt|NT)/\1ʼ\2/g" \
         -e "s/(ain|Ain|AIN)'(t|T)/\1ʼ\2/g" \