feat(unitproc/bkt-replace_contractions):Add Bash function
authorSteven Baltakatei Sandoval <baltakatei@gmail.com>
Thu, 25 Jan 2024 20:56:58 +0000 (20:56 +0000)
committerSteven Baltakatei Sandoval <baltakatei@gmail.com>
Thu, 25 Jan 2024 20:56:58 +0000 (20:56 +0000)
unitproc/bkt-replace_contractions [new file with mode: 0644]

diff --git a/unitproc/bkt-replace_contractions b/unitproc/bkt-replace_contractions
new file mode 100644 (file)
index 0000000..fe1d5d6
--- /dev/null
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Function to replace contractions
+replace_contractions() {
+    # Desc: Replace ' with ʼ in contractions
+    # Note: In contractions of UTF-8 text file, replaces U+0027
+    #   APOSTROPHE with U+02BC MODIFIER LETTER APOSTROPHE
+    # Input: stdin  
+    #        arg1   file path
+    # Output: stdout
+    # Version: 0.0.1
+    # Depends: GNU sed 4.8
+
+    # Check input
+    if [[ "$#" -gt 1 ]]; then
+        echo "FATAL:Incorrect argument count:$#" 1>&2;
+        return 1;
+    fi;
+
+    if [[ -f "$1" ]]; then
+        # Use specified file
+        input="$1";
+    else
+        # Use standard input
+        input="-";
+    fi;
+
+    # Perform substitutions
+    sed -E \
+        -e "s/(you're|You're|YOU'RE)/youʼre/gI" \
+        -e "s/(i'm|I'm|I'M)/Iʼm/gI" \
+        -e "s/(you've|You've|YOU'VE)/youʼve/gI" \
+        -e "s/(they're|They're|THEY'RE)/theyʼre/gI" \
+        -e "s/(we're|We're|WE'RE)/weʼre/gI" \
+        -e "s/(they've|They've|THEY'VE)/theyʼve/gI" \
+        -e "s/(we've|We've|WE'VE)/weʼve/gI" \
+        -e "s/(i've|I've|I'VE)/Iʼve/gI" \
+        -e "s/(that's|That's|THAT'S)/thatʼs/gI" \
+        -e "s/(what's|What's|WHAT'S)/whatʼs/gI" \
+        -e "s/(here's|Here's|HERE'S)/hereʼs/gI" \
+        -e "s/(there's|There's|THERE'S)/thereʼs/gI" \
+        -e "s/(where's|Where's|WHERE'S)/whereʼs/gI" \
+        -e "s/(who's|Who's|WHO'S)/whoʼs/gI" \
+        -e "s/(how's|How's|HOW'S)/howʼs/gI" \
+        -e "s/(doesn't|Doesn't|DOESN'T)/doesnʼt/gI" \
+        -e "s/(don't|Don't|DON'T)/donʼt/gI" \
+        -e "s/(i'll|I'll|I'LL)/Iʼll/gI" \
+        -e "s/(we'll|We'll|WE'LL)/weʼll/gI" \
+        -e "s/(they'll|They'll|THEY'LL)/theyʼll/gI" \
+        "$input";
+}; # replace ' with ʼ in contractions