#!/bin/bash
# Desc: Runs OpenAI Whisper on working directory media files
# Usage: ./transcribe_whisper.sh [dir] 3
# Input: arg1  input dir
#        arg2  CUDA graphics card number (zero-indexed)
# Version: 0.4.0
# Depends: whisper ( https://github.com/openai/whisper )

# Find settings
firegex=".+\(aac\|aif\|aiff\|flac\|m4a\|m4b\|mkv\|mp3\|mp4\|ogg\|opus\|wav\)$"; # update according to `find . -type f | grep -Eo "\.([[:alnum:]])+$" | sort -u`
fsize="10k"; # default: minimum "10k"
fdepth="10"; # find depth

yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails
checkInt() {
    # Desc: Checks if arg is integer
    # Usage: checkInt arg
    # Input: arg: integer
    # Output: - return code 0 (if arg is integer)
    #         - return code 1 (if arg is not integer)
    # Example: if ! checkInt $arg; then echo "not int"; fi;
    # Version: 0.0.2
    local returnState

    #===Process Arg===
    if [[ $# -ne 1 ]]; then
	die "ERROR:Invalid number of arguments:$#";
    fi;
    
    RETEST1='^[0-9]+$'; # Regular Expression to test
    if [[ ! "$1" =~ $RETEST1 ]] ; then
	returnState="false";
    else
	returnState="true";
    fi;

    #===Determine function return code===
    if [ "$returnState" = "true" ]; then
	return 0;
    else
	return 1;
    fi;
} # Checks if arg is integer
find_flist() {
    # Desc: print file list to stdout via `find` using script parameters
    # Input: arg1:          path to dir
    #        var:  fdepth   find depth
    #        var:  firegex  pattern find iregex
    #        var:  fsize    find size
    if [[ ! -d "$1" ]]; then return 1; fi;
    must find "$1" -maxdepth "$fdepth" -type f -iregex "$firegex" -size +"$fsize";
}; # print file list to stdout from dir with script parameters
main() {
    # Input: arg1: dir_in    input dir
    #        arg2: cuda_num  cuda GPU index
    #        var:  fdepth    (find_flist) find depth
    #        var:  firegex   (find_flist) pattern find iregex
    #        var:  fsize     (find_flist) find size
    dir_in="$1";
    cuda_num="$2";
    if ! checkInt "$cuda_num"; then die "FATAL:No graphics card selected."; fi;
    while read -r line; do
        echo "STATUS:Processing:$line" 1>&2;
        SECONDS=0;        
        dir_out="$(dirname "$line"; )";
        ftmp="$line".tmp;
        #declare -p line dir_out ftmp; # debug
        if [[ ! -f "$ftmp" ]] && \
               [[ ! -f "${line%.*}".srt ]] && \
               [[ ! -f "${line%.*}".vtt ]] && \
               [[ ! -f "${line%.*}".txt ]] && \
               [[ ! -f "${line%.*}".tsv ]] && \
               [[ ! -f "${line%.*}".json ]]; then
            touch "$ftmp";
            yell "STATUS:No conflicts detected.";
        else
            yell "STATUS:Skipping:$line";
            continue;
        fi;
        whisper "$line" \
                --model large-v3 \
                --output_format all \
                --output_dir "$dir_out" \
                --language en \
                --device cuda:"$cuda_num" && \
            (
                echo "STATUS:$SECONDS:Finished:$line" 1>&2;
                rm "$ftmp"; # remove .tmp file
            );
    done < <(find_flist "$dir_in" | shuf);
}; # main program
export -f yell die must find_flist;

main "$@";


