From 265d3d9a134fcb78054560278b5d9b6d1f6396bd Mon Sep 17 00:00:00 2001 From: Steven Baltakatei Sandoval Date: Thu, 25 Jan 2024 20:56:58 +0000 Subject: [PATCH] feat(unitproc/bkt-replace_contractions):Add Bash function --- unitproc/bkt-replace_contractions | 51 +++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 unitproc/bkt-replace_contractions diff --git a/unitproc/bkt-replace_contractions b/unitproc/bkt-replace_contractions new file mode 100644 index 0000000..fe1d5d6 --- /dev/null +++ b/unitproc/bkt-replace_contractions @@ -0,0 +1,51 @@ +#!/bin/bash + +# Function to replace contractions +replace_contractions() { + # Desc: Replace ' with ʼ in contractions + # Note: In contractions of UTF-8 text file, replaces U+0027 + # APOSTROPHE with U+02BC MODIFIER LETTER APOSTROPHE + # Input: stdin + # arg1 file path + # Output: stdout + # Version: 0.0.1 + # Depends: GNU sed 4.8 + + # Check input + if [[ "$#" -gt 1 ]]; then + echo "FATAL:Incorrect argument count:$#" 1>&2; + return 1; + fi; + + if [[ -f "$1" ]]; then + # Use specified file + input="$1"; + else + # Use standard input + input="-"; + fi; + + # Perform substitutions + sed -E \ + -e "s/(you're|You're|YOU'RE)/youʼre/gI" \ + -e "s/(i'm|I'm|I'M)/Iʼm/gI" \ + -e "s/(you've|You've|YOU'VE)/youʼve/gI" \ + -e "s/(they're|They're|THEY'RE)/theyʼre/gI" \ + -e "s/(we're|We're|WE'RE)/weʼre/gI" \ + -e "s/(they've|They've|THEY'VE)/theyʼve/gI" \ + -e "s/(we've|We've|WE'VE)/weʼve/gI" \ + -e "s/(i've|I've|I'VE)/Iʼve/gI" \ + -e "s/(that's|That's|THAT'S)/thatʼs/gI" \ + -e "s/(what's|What's|WHAT'S)/whatʼs/gI" \ + -e "s/(here's|Here's|HERE'S)/hereʼs/gI" \ + -e "s/(there's|There's|THERE'S)/thereʼs/gI" \ + -e "s/(where's|Where's|WHERE'S)/whereʼs/gI" \ + -e "s/(who's|Who's|WHO'S)/whoʼs/gI" \ + -e "s/(how's|How's|HOW'S)/howʼs/gI" \ + -e "s/(doesn't|Doesn't|DOESN'T)/doesnʼt/gI" \ + -e "s/(don't|Don't|DON'T)/donʼt/gI" \ + -e "s/(i'll|I'll|I'LL)/Iʼll/gI" \ + -e "s/(we'll|We'll|WE'LL)/weʼll/gI" \ + -e "s/(they'll|They'll|THEY'LL)/theyʼll/gI" \ + "$input"; +}; # replace ' with ʼ in contractions -- 2.30.2