#!/bin/bash
#Desc: A custom wrapper for running tesseract recursively on a directory
# Version: 0.0.1
# Depends: Debian: tesseract 4.1.1
# Depends: BK-2020-03: checkapp() 0.1.1, checkfile() 0.1.2, checkdir() 0.1.2, displayMissing() 1.0.0

declare -Ag appRollCall # Associative array for storing app status
declare -Ag fileRollCall # Associative array for storing file status
declare -Ag dirRollCall # Associative array for storing dir status


yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails
checkapp() {
    # Desc: If arg is a command, save result in assoc array 'appRollCall'
    # Usage: checkapp arg1 arg2 arg3 ...
    # Version: 0.1.1
    # Input: global assoc. array 'appRollCall'
    # Output: adds/updates key(value) to global assoc array 'appRollCall'
    # Depends: bash 5.0.3
    local returnState    

    #===Process Args===
    for arg in "$@"; do
	if command -v "$arg" 1>/dev/null 2>&1; then # Check if arg is a valid command
	    appRollCall[$arg]="true";
	    if ! [ "$returnState" = "false" ]; then returnState="true"; fi;
	else
	    appRollCall[$arg]="false"; returnState="false";
	fi;
    done;

    #===Determine function return code===
    if [ "$returnState" = "true" ]; then
	return 0;
    else
	return 1;
    fi;
} # Check that app exists
checkfile() {
    # Desc: If arg is a file path, save result in assoc array 'fileRollCall'
    # Usage: checkfile arg1 arg2 arg3 ...
    # Version: 0.1.2
    # Input: global assoc. array 'fileRollCall'
    # Output: adds/updates key(value) to global assoc array 'fileRollCall';
    # Output: returns 0 if app found, 1 otherwise
    # Depends: bash 5.0.3
    local returnState

    #===Process Args===
    for arg in "$@"; do
	if [ -f "$arg" ]; then
	    fileRollCall["$arg"]="true";
	    if ! [ "$returnState" = "false" ]; then returnState="true"; fi;
        elif [ -z "$arg" ]; then
            fileRollCall["(no name)"]="false"; returnState="false";
	else
	    fileRollCall["$arg"]="false"; returnState="false";
	fi;
    done;
    
    #===Determine function return code===
    if [ "$returnState" = "true" ]; then
	return 0;
    else
	return 1;
    fi;
} # Check that file exists
checkdir() {
    # Desc: If arg is a dir path, save result in assoc array 'dirRollCall'
    # Usage: checkdir arg1 arg2 arg3 ...
    # Version 0.1.2
    # Input: global assoc. array 'dirRollCall'
    # Output: adds/updates key(value) to global assoc array 'dirRollCall';
    # Output: returns 0 if all args are dirs; 1 otherwise
    # Depends: Bash 5.0.3
    local returnState

    #===Process Args===
    for arg in "$@"; do
	if [ -z "$arg" ]; then
	    dirRollCall["(Unspecified Dirname(s))"]="false"; returnState="false";
	elif [ -d "$arg" ]; then
	    dirRollCall["$arg"]="true";
	    if ! [ "$returnState" = "false" ]; then returnState="true"; fi
	else
	    dirRollCall["$arg"]="false"; returnState="false";
	fi
    done
    
    #===Determine function return code===
    if [ "$returnState" = "true" ]; then
	return 0;
    else
	return 1;
    fi
} # Check that dir exists
displayMissing() {
    # Desc: Displays missing apps, files, and dirs
    # Usage: displayMissing
    # Version 1.0.0
    # Input: associative arrays: appRollCall, fileRollCall, dirRollCall
    # Output: stderr: messages indicating missing apps, file, or dirs
    # Output: returns exit code 0 if nothing missing; 1 otherwise
    # Depends: bash 5, checkAppFileDir()
    local missingApps value appMissing missingFiles fileMissing
    local missingDirs dirMissing

    #==BEGIN Display errors==
    #===BEGIN Display Missing Apps===
    missingApps="Missing apps  :";
    #for key in "${!appRollCall[@]}"; do echo "DEBUG:$key => ${appRollCall[$key]}"; done
    for key in "${!appRollCall[@]}"; do
	value="${appRollCall[$key]}";
	if [ "$value" = "false" ]; then
	    #echo "DEBUG:Missing apps: $key => $value";
	    missingApps="$missingApps""$key ";
	    appMissing="true";
	fi;
    done;
    if [ "$appMissing" = "true" ]; then  # Only indicate if an app is missing.
	echo "$missingApps" 1>&2;
    fi;
    unset value;
    #===END Display Missing Apps===

    #===BEGIN Display Missing Files===
    missingFiles="Missing files:";
    #for key in "${!fileRollCall[@]}"; do echo "DEBUG:$key => ${fileRollCall[$key]}"; done
    for key in "${!fileRollCall[@]}"; do
	value="${fileRollCall[$key]}";
	if [ "$value" = "false" ]; then
	    #echo "DEBUG:Missing files: $key => $value";
	    missingFiles="$missingFiles""$key ";
	    fileMissing="true";
	fi;
    done;
    if [ "$fileMissing" = "true" ]; then  # Only indicate if an app is missing.
	echo "$missingFiles" 1>&2;
    fi;
    unset value;
    #===END Display Missing Files===

    #===BEGIN Display Missing Directories===
    missingDirs="Missing dirs:";
    #for key in "${!dirRollCall[@]}"; do echo "DEBUG:$key => ${dirRollCall[$key]}"; done
    for key in "${!dirRollCall[@]}"; do
	value="${dirRollCall[$key]}";
	if [ "$value" = "false" ]; then
	    #echo "DEBUG:Missing dirs: $key => $value";
	    missingDirs="$missingDirs""$key ";
	    dirMissing="true";
	fi;
    done;
    if [ "$dirMissing" = "true" ]; then  # Only indicate if an dir is missing.
	echo "$missingDirs" 1>&2;
    fi;
    unset value;
    #===END Display Missing Directories===

    #==END Display errors==
    #==BEGIN Determine function return code===
    if [ "$appMissing" == "true" ] || [ "$fileMissing" == "true" ] || [ "$dirMissing" == "true" ]; then
	return 1;
    else
	return 0;
    fi
    #==END Determine function return code===
} # Display missing apps, files, dirs
checkPlumbing() {
    if ! checkapp tesseract parallel grep shuf find; then
        displayMissing; die "FATAL: Missing apps."; fi;
    if [[ ! -d "$1" ]]; then die "FATAL:Not a dir:$1"; fi;
}; # Check dependencies and expected inputs
runTesseract() {
    # Depends: Debian: tesseract 4.1.1
    # Input: arg1:  file path

    # Check input file path
    if [[ ! -f "$1" ]]; then yell "File not found:$1"; return 1; fi;

    # Print file path    
    printf "File path:%s\n" "$1";
    
    # Run tesseract
    if ! tesseract "$1" stdout 2>/dev/random; then yell "STATUS:Tesseract exited with error."; fi;
}; # Run tesseract
main() {
    declare -a array_files; # array for storing files to scan    
    
    # Inputs: arg1: dir
    checkPlumbing "$@";

    # Assemble file list
    pat_exclusions=".ots$";
    while read -r line; do
        array_files+=("$line");
    done < <(find "$1" -type f | grep -Eiv "$pat_exclusions" | shuf);

    # Run tesseract
    parallel --jobs="100%" runTesseract '{}' ::: "${array_files[@]}";
}; # main program

export -f yell die must runTesseract;

main "$@";