From eb1af746ce43c95e2a97b161e2f0c5a2d9b85b71 Mon Sep 17 00:00:00 2001 From: Steven Baltakatei Sandoval Date: Sat, 30 Sep 2023 20:28:00 +0000 Subject: [PATCH] feat(bktess):Add bash wrapper for tesseract img OCR --- user/bktess | 207 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100755 user/bktess diff --git a/user/bktess b/user/bktess new file mode 100755 index 0000000..03a8b2d --- /dev/null +++ b/user/bktess @@ -0,0 +1,207 @@ +#!/bin/bash +#Desc: A custom wrapper for running tesseract recursively on a directory +# Version: 0.0.1 +# Depends: Debian: tesseract 4.1.1 +# Depends: BK-2020-03: checkapp() 0.1.1, checkfile() 0.1.2, checkdir() 0.1.2, displayMissing() 1.0.0 + +declare -Ag appRollCall # Associative array for storing app status +declare -Ag fileRollCall # Associative array for storing file status +declare -Ag dirRollCall # Associative array for storing dir status + + +yell() { echo "$0: $*" >&2; } # print script path and all args to stderr +die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status +must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails +checkapp() { + # Desc: If arg is a command, save result in assoc array 'appRollCall' + # Usage: checkapp arg1 arg2 arg3 ... + # Version: 0.1.1 + # Input: global assoc. array 'appRollCall' + # Output: adds/updates key(value) to global assoc array 'appRollCall' + # Depends: bash 5.0.3 + local returnState + + #===Process Args=== + for arg in "$@"; do + if command -v "$arg" 1>/dev/null 2>&1; then # Check if arg is a valid command + appRollCall[$arg]="true"; + if ! [ "$returnState" = "false" ]; then returnState="true"; fi; + else + appRollCall[$arg]="false"; returnState="false"; + fi; + done; + + #===Determine function return code=== + if [ "$returnState" = "true" ]; then + return 0; + else + return 1; + fi; +} # Check that app exists +checkfile() { + # Desc: If arg is a file path, save result in assoc array 'fileRollCall' + # Usage: checkfile arg1 arg2 arg3 ... + # Version: 0.1.2 + # Input: global assoc. array 'fileRollCall' + # Output: adds/updates key(value) to global assoc array 'fileRollCall'; + # Output: returns 0 if app found, 1 otherwise + # Depends: bash 5.0.3 + local returnState + + #===Process Args=== + for arg in "$@"; do + if [ -f "$arg" ]; then + fileRollCall["$arg"]="true"; + if ! [ "$returnState" = "false" ]; then returnState="true"; fi; + elif [ -z "$arg" ]; then + fileRollCall["(no name)"]="false"; returnState="false"; + else + fileRollCall["$arg"]="false"; returnState="false"; + fi; + done; + + #===Determine function return code=== + if [ "$returnState" = "true" ]; then + return 0; + else + return 1; + fi; +} # Check that file exists +checkdir() { + # Desc: If arg is a dir path, save result in assoc array 'dirRollCall' + # Usage: checkdir arg1 arg2 arg3 ... + # Version 0.1.2 + # Input: global assoc. array 'dirRollCall' + # Output: adds/updates key(value) to global assoc array 'dirRollCall'; + # Output: returns 0 if all args are dirs; 1 otherwise + # Depends: Bash 5.0.3 + local returnState + + #===Process Args=== + for arg in "$@"; do + if [ -z "$arg" ]; then + dirRollCall["(Unspecified Dirname(s))"]="false"; returnState="false"; + elif [ -d "$arg" ]; then + dirRollCall["$arg"]="true"; + if ! [ "$returnState" = "false" ]; then returnState="true"; fi + else + dirRollCall["$arg"]="false"; returnState="false"; + fi + done + + #===Determine function return code=== + if [ "$returnState" = "true" ]; then + return 0; + else + return 1; + fi +} # Check that dir exists +displayMissing() { + # Desc: Displays missing apps, files, and dirs + # Usage: displayMissing + # Version 1.0.0 + # Input: associative arrays: appRollCall, fileRollCall, dirRollCall + # Output: stderr: messages indicating missing apps, file, or dirs + # Output: returns exit code 0 if nothing missing; 1 otherwise + # Depends: bash 5, checkAppFileDir() + local missingApps value appMissing missingFiles fileMissing + local missingDirs dirMissing + + #==BEGIN Display errors== + #===BEGIN Display Missing Apps=== + missingApps="Missing apps :"; + #for key in "${!appRollCall[@]}"; do echo "DEBUG:$key => ${appRollCall[$key]}"; done + for key in "${!appRollCall[@]}"; do + value="${appRollCall[$key]}"; + if [ "$value" = "false" ]; then + #echo "DEBUG:Missing apps: $key => $value"; + missingApps="$missingApps""$key "; + appMissing="true"; + fi; + done; + if [ "$appMissing" = "true" ]; then # Only indicate if an app is missing. + echo "$missingApps" 1>&2; + fi; + unset value; + #===END Display Missing Apps=== + + #===BEGIN Display Missing Files=== + missingFiles="Missing files:"; + #for key in "${!fileRollCall[@]}"; do echo "DEBUG:$key => ${fileRollCall[$key]}"; done + for key in "${!fileRollCall[@]}"; do + value="${fileRollCall[$key]}"; + if [ "$value" = "false" ]; then + #echo "DEBUG:Missing files: $key => $value"; + missingFiles="$missingFiles""$key "; + fileMissing="true"; + fi; + done; + if [ "$fileMissing" = "true" ]; then # Only indicate if an app is missing. + echo "$missingFiles" 1>&2; + fi; + unset value; + #===END Display Missing Files=== + + #===BEGIN Display Missing Directories=== + missingDirs="Missing dirs:"; + #for key in "${!dirRollCall[@]}"; do echo "DEBUG:$key => ${dirRollCall[$key]}"; done + for key in "${!dirRollCall[@]}"; do + value="${dirRollCall[$key]}"; + if [ "$value" = "false" ]; then + #echo "DEBUG:Missing dirs: $key => $value"; + missingDirs="$missingDirs""$key "; + dirMissing="true"; + fi; + done; + if [ "$dirMissing" = "true" ]; then # Only indicate if an dir is missing. + echo "$missingDirs" 1>&2; + fi; + unset value; + #===END Display Missing Directories=== + + #==END Display errors== + #==BEGIN Determine function return code=== + if [ "$appMissing" == "true" ] || [ "$fileMissing" == "true" ] || [ "$dirMissing" == "true" ]; then + return 1; + else + return 0; + fi + #==END Determine function return code=== +} # Display missing apps, files, dirs +checkPlumbing() { + if ! checkapp tesseract parallel grep shuf find; then + displayMissing; die "FATAL: Missing apps."; fi; + if [[ ! -d "$1" ]]; then die "FATAL:Not a dir:$1"; fi; +}; # Check dependencies and expected inputs +runTesseract() { + # Depends: Debian: tesseract 4.1.1 + # Input: arg1: file path + + # Check input file path + if [[ ! -f "$1" ]]; then yell "File not found:$1"; return 1; fi; + + # Print file path + printf "File path:%s\n" "$1"; + + # Run tesseract + if ! tesseract "$1" stdout 2>/dev/random; then yell "STATUS:Tesseract exited with error."; fi; +}; # Run tesseract +main() { + declare -a array_files; # array for storing files to scan + + # Inputs: arg1: dir + checkPlumbing "$@"; + + # Assemble file list + pat_exclusions=".ots$"; + while read -r line; do + array_files+=("$line"); + done < <(find "$1" -type f | grep -Eiv "$pat_exclusions" | shuf); + + # Run tesseract + parallel --jobs="100%" runTesseract '{}' ::: "${array_files[@]}"; +}; # main program + +export -f yell die must runTesseract; + +main "$@"; -- 2.30.2