feat(bktess):Add bash wrapper for tesseract img OCR
[BK-2020-03.git] / user / bktess
1 #!/bin/bash
2 #Desc: A custom wrapper for running tesseract recursively on a directory
3 # Version: 0.0.1
4 # Depends: Debian: tesseract 4.1.1
5 # Depends: BK-2020-03: checkapp() 0.1.1, checkfile() 0.1.2, checkdir() 0.1.2, displayMissing() 1.0.0
6
7 declare -Ag appRollCall # Associative array for storing app status
8 declare -Ag fileRollCall # Associative array for storing file status
9 declare -Ag dirRollCall # Associative array for storing dir status
10
11
12 yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
13 die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
14 must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails
15 checkapp() {
16 # Desc: If arg is a command, save result in assoc array 'appRollCall'
17 # Usage: checkapp arg1 arg2 arg3 ...
18 # Version: 0.1.1
19 # Input: global assoc. array 'appRollCall'
20 # Output: adds/updates key(value) to global assoc array 'appRollCall'
21 # Depends: bash 5.0.3
22 local returnState
23
24 #===Process Args===
25 for arg in "$@"; do
26 if command -v "$arg" 1>/dev/null 2>&1; then # Check if arg is a valid command
27 appRollCall[$arg]="true";
28 if ! [ "$returnState" = "false" ]; then returnState="true"; fi;
29 else
30 appRollCall[$arg]="false"; returnState="false";
31 fi;
32 done;
33
34 #===Determine function return code===
35 if [ "$returnState" = "true" ]; then
36 return 0;
37 else
38 return 1;
39 fi;
40 } # Check that app exists
41 checkfile() {
42 # Desc: If arg is a file path, save result in assoc array 'fileRollCall'
43 # Usage: checkfile arg1 arg2 arg3 ...
44 # Version: 0.1.2
45 # Input: global assoc. array 'fileRollCall'
46 # Output: adds/updates key(value) to global assoc array 'fileRollCall';
47 # Output: returns 0 if app found, 1 otherwise
48 # Depends: bash 5.0.3
49 local returnState
50
51 #===Process Args===
52 for arg in "$@"; do
53 if [ -f "$arg" ]; then
54 fileRollCall["$arg"]="true";
55 if ! [ "$returnState" = "false" ]; then returnState="true"; fi;
56 elif [ -z "$arg" ]; then
57 fileRollCall["(no name)"]="false"; returnState="false";
58 else
59 fileRollCall["$arg"]="false"; returnState="false";
60 fi;
61 done;
62
63 #===Determine function return code===
64 if [ "$returnState" = "true" ]; then
65 return 0;
66 else
67 return 1;
68 fi;
69 } # Check that file exists
70 checkdir() {
71 # Desc: If arg is a dir path, save result in assoc array 'dirRollCall'
72 # Usage: checkdir arg1 arg2 arg3 ...
73 # Version 0.1.2
74 # Input: global assoc. array 'dirRollCall'
75 # Output: adds/updates key(value) to global assoc array 'dirRollCall';
76 # Output: returns 0 if all args are dirs; 1 otherwise
77 # Depends: Bash 5.0.3
78 local returnState
79
80 #===Process Args===
81 for arg in "$@"; do
82 if [ -z "$arg" ]; then
83 dirRollCall["(Unspecified Dirname(s))"]="false"; returnState="false";
84 elif [ -d "$arg" ]; then
85 dirRollCall["$arg"]="true";
86 if ! [ "$returnState" = "false" ]; then returnState="true"; fi
87 else
88 dirRollCall["$arg"]="false"; returnState="false";
89 fi
90 done
91
92 #===Determine function return code===
93 if [ "$returnState" = "true" ]; then
94 return 0;
95 else
96 return 1;
97 fi
98 } # Check that dir exists
99 displayMissing() {
100 # Desc: Displays missing apps, files, and dirs
101 # Usage: displayMissing
102 # Version 1.0.0
103 # Input: associative arrays: appRollCall, fileRollCall, dirRollCall
104 # Output: stderr: messages indicating missing apps, file, or dirs
105 # Output: returns exit code 0 if nothing missing; 1 otherwise
106 # Depends: bash 5, checkAppFileDir()
107 local missingApps value appMissing missingFiles fileMissing
108 local missingDirs dirMissing
109
110 #==BEGIN Display errors==
111 #===BEGIN Display Missing Apps===
112 missingApps="Missing apps :";
113 #for key in "${!appRollCall[@]}"; do echo "DEBUG:$key => ${appRollCall[$key]}"; done
114 for key in "${!appRollCall[@]}"; do
115 value="${appRollCall[$key]}";
116 if [ "$value" = "false" ]; then
117 #echo "DEBUG:Missing apps: $key => $value";
118 missingApps="$missingApps""$key ";
119 appMissing="true";
120 fi;
121 done;
122 if [ "$appMissing" = "true" ]; then # Only indicate if an app is missing.
123 echo "$missingApps" 1>&2;
124 fi;
125 unset value;
126 #===END Display Missing Apps===
127
128 #===BEGIN Display Missing Files===
129 missingFiles="Missing files:";
130 #for key in "${!fileRollCall[@]}"; do echo "DEBUG:$key => ${fileRollCall[$key]}"; done
131 for key in "${!fileRollCall[@]}"; do
132 value="${fileRollCall[$key]}";
133 if [ "$value" = "false" ]; then
134 #echo "DEBUG:Missing files: $key => $value";
135 missingFiles="$missingFiles""$key ";
136 fileMissing="true";
137 fi;
138 done;
139 if [ "$fileMissing" = "true" ]; then # Only indicate if an app is missing.
140 echo "$missingFiles" 1>&2;
141 fi;
142 unset value;
143 #===END Display Missing Files===
144
145 #===BEGIN Display Missing Directories===
146 missingDirs="Missing dirs:";
147 #for key in "${!dirRollCall[@]}"; do echo "DEBUG:$key => ${dirRollCall[$key]}"; done
148 for key in "${!dirRollCall[@]}"; do
149 value="${dirRollCall[$key]}";
150 if [ "$value" = "false" ]; then
151 #echo "DEBUG:Missing dirs: $key => $value";
152 missingDirs="$missingDirs""$key ";
153 dirMissing="true";
154 fi;
155 done;
156 if [ "$dirMissing" = "true" ]; then # Only indicate if an dir is missing.
157 echo "$missingDirs" 1>&2;
158 fi;
159 unset value;
160 #===END Display Missing Directories===
161
162 #==END Display errors==
163 #==BEGIN Determine function return code===
164 if [ "$appMissing" == "true" ] || [ "$fileMissing" == "true" ] || [ "$dirMissing" == "true" ]; then
165 return 1;
166 else
167 return 0;
168 fi
169 #==END Determine function return code===
170 } # Display missing apps, files, dirs
171 checkPlumbing() {
172 if ! checkapp tesseract parallel grep shuf find; then
173 displayMissing; die "FATAL: Missing apps."; fi;
174 if [[ ! -d "$1" ]]; then die "FATAL:Not a dir:$1"; fi;
175 }; # Check dependencies and expected inputs
176 runTesseract() {
177 # Depends: Debian: tesseract 4.1.1
178 # Input: arg1: file path
179
180 # Check input file path
181 if [[ ! -f "$1" ]]; then yell "File not found:$1"; return 1; fi;
182
183 # Print file path
184 printf "File path:%s\n" "$1";
185
186 # Run tesseract
187 if ! tesseract "$1" stdout 2>/dev/random; then yell "STATUS:Tesseract exited with error."; fi;
188 }; # Run tesseract
189 main() {
190 declare -a array_files; # array for storing files to scan
191
192 # Inputs: arg1: dir
193 checkPlumbing "$@";
194
195 # Assemble file list
196 pat_exclusions=".ots$";
197 while read -r line; do
198 array_files+=("$line");
199 done < <(find "$1" -type f | grep -Eiv "$pat_exclusions" | shuf);
200
201 # Run tesseract
202 parallel --jobs="100%" runTesseract '{}' ::: "${array_files[@]}";
203 }; # main program
204
205 export -f yell die must runTesseract;
206
207 main "$@";