2 #Desc: A custom wrapper for running tesseract recursively on a directory
4 # Depends: Debian: tesseract 4.1.1
5 # Depends: BK-2020-03: checkapp() 0.1.1, checkfile() 0.1.2, checkdir() 0.1.2, displayMissing() 1.0.0
7 declare -Ag appRollCall
# Associative array for storing app status
8 declare -Ag fileRollCall
# Associative array for storing file status
9 declare -Ag dirRollCall
# Associative array for storing dir status
12 yell
() { echo "$0: $*" >&2; } # print script path and all args to stderr
13 die
() { yell
"$*"; exit 111; } # same as yell() but non-zero exit status
14 must
() { "$@" || die
"cannot $*"; } # runs args as command, reports args if command fails
16 # Desc: If arg is a command, save result in assoc array 'appRollCall'
17 # Usage: checkapp arg1 arg2 arg3 ...
19 # Input: global assoc. array 'appRollCall'
20 # Output: adds/updates key(value) to global assoc array 'appRollCall'
26 if command -v "$arg" 1>/dev
/null
2>&1; then # Check if arg is a valid command
27 appRollCall
[$arg]="true";
28 if ! [ "$returnState" = "false" ]; then returnState
="true"; fi;
30 appRollCall
[$arg]="false"; returnState
="false";
34 #===Determine function return code===
35 if [ "$returnState" = "true" ]; then
40 } # Check that app exists
42 # Desc: If arg is a file path, save result in assoc array 'fileRollCall'
43 # Usage: checkfile arg1 arg2 arg3 ...
45 # Input: global assoc. array 'fileRollCall'
46 # Output: adds/updates key(value) to global assoc array 'fileRollCall';
47 # Output: returns 0 if app found, 1 otherwise
53 if [ -f "$arg" ]; then
54 fileRollCall
["$arg"]="true";
55 if ! [ "$returnState" = "false" ]; then returnState
="true"; fi;
56 elif [ -z "$arg" ]; then
57 fileRollCall
["(no name)"]="false"; returnState
="false";
59 fileRollCall
["$arg"]="false"; returnState
="false";
63 #===Determine function return code===
64 if [ "$returnState" = "true" ]; then
69 } # Check that file exists
71 # Desc: If arg is a dir path, save result in assoc array 'dirRollCall'
72 # Usage: checkdir arg1 arg2 arg3 ...
74 # Input: global assoc. array 'dirRollCall'
75 # Output: adds/updates key(value) to global assoc array 'dirRollCall';
76 # Output: returns 0 if all args are dirs; 1 otherwise
82 if [ -z "$arg" ]; then
83 dirRollCall
["(Unspecified Dirname(s))"]="false"; returnState
="false";
84 elif [ -d "$arg" ]; then
85 dirRollCall
["$arg"]="true";
86 if ! [ "$returnState" = "false" ]; then returnState
="true"; fi
88 dirRollCall
["$arg"]="false"; returnState
="false";
92 #===Determine function return code===
93 if [ "$returnState" = "true" ]; then
98 } # Check that dir exists
100 # Desc: Displays missing apps, files, and dirs
101 # Usage: displayMissing
103 # Input: associative arrays: appRollCall, fileRollCall, dirRollCall
104 # Output: stderr: messages indicating missing apps, file, or dirs
105 # Output: returns exit code 0 if nothing missing; 1 otherwise
106 # Depends: bash 5, checkAppFileDir()
107 local missingApps value appMissing missingFiles fileMissing
108 local missingDirs dirMissing
110 #==BEGIN Display errors==
111 #===BEGIN Display Missing Apps===
112 missingApps
="Missing apps :";
113 #for key in "${!appRollCall[@]}"; do echo "DEBUG:$key => ${appRollCall[$key]}"; done
114 for key
in "${!appRollCall[@]}"; do
115 value
="${appRollCall[$key]}";
116 if [ "$value" = "false" ]; then
117 #echo "DEBUG:Missing apps: $key => $value";
118 missingApps
="$missingApps""$key ";
122 if [ "$appMissing" = "true" ]; then # Only indicate if an app is missing.
123 echo "$missingApps" 1>&2;
126 #===END Display Missing Apps===
128 #===BEGIN Display Missing Files===
129 missingFiles
="Missing files:";
130 #for key in "${!fileRollCall[@]}"; do echo "DEBUG:$key => ${fileRollCall[$key]}"; done
131 for key
in "${!fileRollCall[@]}"; do
132 value
="${fileRollCall[$key]}";
133 if [ "$value" = "false" ]; then
134 #echo "DEBUG:Missing files: $key => $value";
135 missingFiles
="$missingFiles""$key ";
139 if [ "$fileMissing" = "true" ]; then # Only indicate if an app is missing.
140 echo "$missingFiles" 1>&2;
143 #===END Display Missing Files===
145 #===BEGIN Display Missing Directories===
146 missingDirs
="Missing dirs:";
147 #for key in "${!dirRollCall[@]}"; do echo "DEBUG:$key => ${dirRollCall[$key]}"; done
148 for key
in "${!dirRollCall[@]}"; do
149 value
="${dirRollCall[$key]}";
150 if [ "$value" = "false" ]; then
151 #echo "DEBUG:Missing dirs: $key => $value";
152 missingDirs
="$missingDirs""$key ";
156 if [ "$dirMissing" = "true" ]; then # Only indicate if an dir is missing.
157 echo "$missingDirs" 1>&2;
160 #===END Display Missing Directories===
162 #==END Display errors==
163 #==BEGIN Determine function return code===
164 if [ "$appMissing" == "true" ] ||
[ "$fileMissing" == "true" ] ||
[ "$dirMissing" == "true" ]; then
169 #==END Determine function return code===
170 } # Display missing apps, files, dirs
172 if ! checkapp tesseract parallel
grep shuf
find; then
173 displayMissing
; die
"FATAL: Missing apps."; fi;
174 if [[ ! -d "$1" ]]; then die
"FATAL:Not a dir:$1"; fi;
175 }; # Check dependencies and expected inputs
177 # Depends: Debian: tesseract 4.1.1
178 # Input: arg1: file path
180 # Check input file path
181 if [[ ! -f "$1" ]]; then yell
"File not found:$1"; return 1; fi;
184 printf "File path:%s\n" "$1";
187 if ! tesseract
"$1" stdout
2>/dev
/random
; then yell
"STATUS:Tesseract exited with error."; fi;
190 declare -a array_files
; # array for storing files to scan
196 pat_exclusions
=".ots$";
197 while read -r line
; do
198 array_files
+=("$line");
199 done < <(find "$1" -type f |
grep -Eiv "$pat_exclusions" | shuf
);
202 parallel
--jobs="100%" runTesseract
'{}' ::: "${array_files[@]}";
205 export -f yell die must runTesseract
;