+#!/bin/bash
+#Desc: A custom wrapper for running tesseract recursively on a directory
+# Version: 0.0.1
+# Depends: Debian: tesseract 4.1.1
+# Depends: BK-2020-03: checkapp() 0.1.1, checkfile() 0.1.2, checkdir() 0.1.2, displayMissing() 1.0.0
+
+declare -Ag appRollCall # Associative array for storing app status
+declare -Ag fileRollCall # Associative array for storing file status
+declare -Ag dirRollCall # Associative array for storing dir status
+
+
+yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
+die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
+must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails
+checkapp() {
+ # Desc: If arg is a command, save result in assoc array 'appRollCall'
+ # Usage: checkapp arg1 arg2 arg3 ...
+ # Version: 0.1.1
+ # Input: global assoc. array 'appRollCall'
+ # Output: adds/updates key(value) to global assoc array 'appRollCall'
+ # Depends: bash 5.0.3
+ local returnState
+
+ #===Process Args===
+ for arg in "$@"; do
+ if command -v "$arg" 1>/dev/null 2>&1; then # Check if arg is a valid command
+ appRollCall[$arg]="true";
+ if ! [ "$returnState" = "false" ]; then returnState="true"; fi;
+ else
+ appRollCall[$arg]="false"; returnState="false";
+ fi;
+ done;
+
+ #===Determine function return code===
+ if [ "$returnState" = "true" ]; then
+ return 0;
+ else
+ return 1;
+ fi;
+} # Check that app exists
+checkfile() {
+ # Desc: If arg is a file path, save result in assoc array 'fileRollCall'
+ # Usage: checkfile arg1 arg2 arg3 ...
+ # Version: 0.1.2
+ # Input: global assoc. array 'fileRollCall'
+ # Output: adds/updates key(value) to global assoc array 'fileRollCall';
+ # Output: returns 0 if app found, 1 otherwise
+ # Depends: bash 5.0.3
+ local returnState
+
+ #===Process Args===
+ for arg in "$@"; do
+ if [ -f "$arg" ]; then
+ fileRollCall["$arg"]="true";
+ if ! [ "$returnState" = "false" ]; then returnState="true"; fi;
+ elif [ -z "$arg" ]; then
+ fileRollCall["(no name)"]="false"; returnState="false";
+ else
+ fileRollCall["$arg"]="false"; returnState="false";
+ fi;
+ done;
+
+ #===Determine function return code===
+ if [ "$returnState" = "true" ]; then
+ return 0;
+ else
+ return 1;
+ fi;
+} # Check that file exists
+checkdir() {
+ # Desc: If arg is a dir path, save result in assoc array 'dirRollCall'
+ # Usage: checkdir arg1 arg2 arg3 ...
+ # Version 0.1.2
+ # Input: global assoc. array 'dirRollCall'
+ # Output: adds/updates key(value) to global assoc array 'dirRollCall';
+ # Output: returns 0 if all args are dirs; 1 otherwise
+ # Depends: Bash 5.0.3
+ local returnState
+
+ #===Process Args===
+ for arg in "$@"; do
+ if [ -z "$arg" ]; then
+ dirRollCall["(Unspecified Dirname(s))"]="false"; returnState="false";
+ elif [ -d "$arg" ]; then
+ dirRollCall["$arg"]="true";
+ if ! [ "$returnState" = "false" ]; then returnState="true"; fi
+ else
+ dirRollCall["$arg"]="false"; returnState="false";
+ fi
+ done
+
+ #===Determine function return code===
+ if [ "$returnState" = "true" ]; then
+ return 0;
+ else
+ return 1;
+ fi
+} # Check that dir exists
+displayMissing() {
+ # Desc: Displays missing apps, files, and dirs
+ # Usage: displayMissing
+ # Version 1.0.0
+ # Input: associative arrays: appRollCall, fileRollCall, dirRollCall
+ # Output: stderr: messages indicating missing apps, file, or dirs
+ # Output: returns exit code 0 if nothing missing; 1 otherwise
+ # Depends: bash 5, checkAppFileDir()
+ local missingApps value appMissing missingFiles fileMissing
+ local missingDirs dirMissing
+
+ #==BEGIN Display errors==
+ #===BEGIN Display Missing Apps===
+ missingApps="Missing apps :";
+ #for key in "${!appRollCall[@]}"; do echo "DEBUG:$key => ${appRollCall[$key]}"; done
+ for key in "${!appRollCall[@]}"; do
+ value="${appRollCall[$key]}";
+ if [ "$value" = "false" ]; then
+ #echo "DEBUG:Missing apps: $key => $value";
+ missingApps="$missingApps""$key ";
+ appMissing="true";
+ fi;
+ done;
+ if [ "$appMissing" = "true" ]; then # Only indicate if an app is missing.
+ echo "$missingApps" 1>&2;
+ fi;
+ unset value;
+ #===END Display Missing Apps===
+
+ #===BEGIN Display Missing Files===
+ missingFiles="Missing files:";
+ #for key in "${!fileRollCall[@]}"; do echo "DEBUG:$key => ${fileRollCall[$key]}"; done
+ for key in "${!fileRollCall[@]}"; do
+ value="${fileRollCall[$key]}";
+ if [ "$value" = "false" ]; then
+ #echo "DEBUG:Missing files: $key => $value";
+ missingFiles="$missingFiles""$key ";
+ fileMissing="true";
+ fi;
+ done;
+ if [ "$fileMissing" = "true" ]; then # Only indicate if an app is missing.
+ echo "$missingFiles" 1>&2;
+ fi;
+ unset value;
+ #===END Display Missing Files===
+
+ #===BEGIN Display Missing Directories===
+ missingDirs="Missing dirs:";
+ #for key in "${!dirRollCall[@]}"; do echo "DEBUG:$key => ${dirRollCall[$key]}"; done
+ for key in "${!dirRollCall[@]}"; do
+ value="${dirRollCall[$key]}";
+ if [ "$value" = "false" ]; then
+ #echo "DEBUG:Missing dirs: $key => $value";
+ missingDirs="$missingDirs""$key ";
+ dirMissing="true";
+ fi;
+ done;
+ if [ "$dirMissing" = "true" ]; then # Only indicate if an dir is missing.
+ echo "$missingDirs" 1>&2;
+ fi;
+ unset value;
+ #===END Display Missing Directories===
+
+ #==END Display errors==
+ #==BEGIN Determine function return code===
+ if [ "$appMissing" == "true" ] || [ "$fileMissing" == "true" ] || [ "$dirMissing" == "true" ]; then
+ return 1;
+ else
+ return 0;
+ fi
+ #==END Determine function return code===
+} # Display missing apps, files, dirs
+checkPlumbing() {
+ if ! checkapp tesseract parallel grep shuf find; then
+ displayMissing; die "FATAL: Missing apps."; fi;
+ if [[ ! -d "$1" ]]; then die "FATAL:Not a dir:$1"; fi;
+}; # Check dependencies and expected inputs
+runTesseract() {
+ # Depends: Debian: tesseract 4.1.1
+ # Input: arg1: file path
+
+ # Check input file path
+ if [[ ! -f "$1" ]]; then yell "File not found:$1"; return 1; fi;
+
+ # Print file path
+ printf "File path:%s\n" "$1";
+
+ # Run tesseract
+ if ! tesseract "$1" stdout 2>/dev/random; then yell "STATUS:Tesseract exited with error."; fi;
+}; # Run tesseract
+main() {
+ declare -a array_files; # array for storing files to scan
+
+ # Inputs: arg1: dir
+ checkPlumbing "$@";
+
+ # Assemble file list
+ pat_exclusions=".ots$";
+ while read -r line; do
+ array_files+=("$line");
+ done < <(find "$1" -type f | grep -Eiv "$pat_exclusions" | shuf);
+
+ # Run tesseract
+ parallel --jobs="100%" runTesseract '{}' ::: "${array_files[@]}";
+}; # main program
+
+export -f yell die must runTesseract;
+
+main "$@";