#!/bin/bash #Desc: A custom wrapper for running tesseract recursively on a directory # Version: 0.0.1 # Depends: Debian: tesseract 4.1.1 # Depends: BK-2020-03: checkapp() 0.1.1, checkfile() 0.1.2, checkdir() 0.1.2, displayMissing() 1.0.0 declare -Ag appRollCall # Associative array for storing app status declare -Ag fileRollCall # Associative array for storing file status declare -Ag dirRollCall # Associative array for storing dir status yell() { echo "$0: $*" >&2; } # print script path and all args to stderr die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails checkapp() { # Desc: If arg is a command, save result in assoc array 'appRollCall' # Usage: checkapp arg1 arg2 arg3 ... # Version: 0.1.1 # Input: global assoc. array 'appRollCall' # Output: adds/updates key(value) to global assoc array 'appRollCall' # Depends: bash 5.0.3 local returnState #===Process Args=== for arg in "$@"; do if command -v "$arg" 1>/dev/null 2>&1; then # Check if arg is a valid command appRollCall[$arg]="true"; if ! [ "$returnState" = "false" ]; then returnState="true"; fi; else appRollCall[$arg]="false"; returnState="false"; fi; done; #===Determine function return code=== if [ "$returnState" = "true" ]; then return 0; else return 1; fi; } # Check that app exists checkfile() { # Desc: If arg is a file path, save result in assoc array 'fileRollCall' # Usage: checkfile arg1 arg2 arg3 ... # Version: 0.1.2 # Input: global assoc. array 'fileRollCall' # Output: adds/updates key(value) to global assoc array 'fileRollCall'; # Output: returns 0 if app found, 1 otherwise # Depends: bash 5.0.3 local returnState #===Process Args=== for arg in "$@"; do if [ -f "$arg" ]; then fileRollCall["$arg"]="true"; if ! [ "$returnState" = "false" ]; then returnState="true"; fi; elif [ -z "$arg" ]; then fileRollCall["(no name)"]="false"; returnState="false"; else fileRollCall["$arg"]="false"; returnState="false"; fi; done; #===Determine function return code=== if [ "$returnState" = "true" ]; then return 0; else return 1; fi; } # Check that file exists checkdir() { # Desc: If arg is a dir path, save result in assoc array 'dirRollCall' # Usage: checkdir arg1 arg2 arg3 ... # Version 0.1.2 # Input: global assoc. array 'dirRollCall' # Output: adds/updates key(value) to global assoc array 'dirRollCall'; # Output: returns 0 if all args are dirs; 1 otherwise # Depends: Bash 5.0.3 local returnState #===Process Args=== for arg in "$@"; do if [ -z "$arg" ]; then dirRollCall["(Unspecified Dirname(s))"]="false"; returnState="false"; elif [ -d "$arg" ]; then dirRollCall["$arg"]="true"; if ! [ "$returnState" = "false" ]; then returnState="true"; fi else dirRollCall["$arg"]="false"; returnState="false"; fi done #===Determine function return code=== if [ "$returnState" = "true" ]; then return 0; else return 1; fi } # Check that dir exists displayMissing() { # Desc: Displays missing apps, files, and dirs # Usage: displayMissing # Version 1.0.0 # Input: associative arrays: appRollCall, fileRollCall, dirRollCall # Output: stderr: messages indicating missing apps, file, or dirs # Output: returns exit code 0 if nothing missing; 1 otherwise # Depends: bash 5, checkAppFileDir() local missingApps value appMissing missingFiles fileMissing local missingDirs dirMissing #==BEGIN Display errors== #===BEGIN Display Missing Apps=== missingApps="Missing apps :"; #for key in "${!appRollCall[@]}"; do echo "DEBUG:$key => ${appRollCall[$key]}"; done for key in "${!appRollCall[@]}"; do value="${appRollCall[$key]}"; if [ "$value" = "false" ]; then #echo "DEBUG:Missing apps: $key => $value"; missingApps="$missingApps""$key "; appMissing="true"; fi; done; if [ "$appMissing" = "true" ]; then # Only indicate if an app is missing. echo "$missingApps" 1>&2; fi; unset value; #===END Display Missing Apps=== #===BEGIN Display Missing Files=== missingFiles="Missing files:"; #for key in "${!fileRollCall[@]}"; do echo "DEBUG:$key => ${fileRollCall[$key]}"; done for key in "${!fileRollCall[@]}"; do value="${fileRollCall[$key]}"; if [ "$value" = "false" ]; then #echo "DEBUG:Missing files: $key => $value"; missingFiles="$missingFiles""$key "; fileMissing="true"; fi; done; if [ "$fileMissing" = "true" ]; then # Only indicate if an app is missing. echo "$missingFiles" 1>&2; fi; unset value; #===END Display Missing Files=== #===BEGIN Display Missing Directories=== missingDirs="Missing dirs:"; #for key in "${!dirRollCall[@]}"; do echo "DEBUG:$key => ${dirRollCall[$key]}"; done for key in "${!dirRollCall[@]}"; do value="${dirRollCall[$key]}"; if [ "$value" = "false" ]; then #echo "DEBUG:Missing dirs: $key => $value"; missingDirs="$missingDirs""$key "; dirMissing="true"; fi; done; if [ "$dirMissing" = "true" ]; then # Only indicate if an dir is missing. echo "$missingDirs" 1>&2; fi; unset value; #===END Display Missing Directories=== #==END Display errors== #==BEGIN Determine function return code=== if [ "$appMissing" == "true" ] || [ "$fileMissing" == "true" ] || [ "$dirMissing" == "true" ]; then return 1; else return 0; fi #==END Determine function return code=== } # Display missing apps, files, dirs checkPlumbing() { if ! checkapp tesseract parallel grep shuf find; then displayMissing; die "FATAL: Missing apps."; fi; if [[ ! -d "$1" ]]; then die "FATAL:Not a dir:$1"; fi; }; # Check dependencies and expected inputs runTesseract() { # Depends: Debian: tesseract 4.1.1 # Input: arg1: file path # Check input file path if [[ ! -f "$1" ]]; then yell "File not found:$1"; return 1; fi; # Print file path printf "File path:%s\n" "$1"; # Run tesseract if ! tesseract "$1" stdout 2>/dev/random; then yell "STATUS:Tesseract exited with error."; fi; }; # Run tesseract main() { declare -a array_files; # array for storing files to scan # Inputs: arg1: dir checkPlumbing "$@"; # Assemble file list pat_exclusions=".ots$"; while read -r line; do array_files+=("$line"); done < <(find "$1" -type f | grep -Eiv "$pat_exclusions" | shuf); # Run tesseract parallel --jobs="100%" runTesseract '{}' ::: "${array_files[@]}"; }; # main program export -f yell die must runTesseract; main "$@";