#!/usr/bin/env bash
# Desc: Copies random audio files
# Usage: bk-copy-rand-music [dir SOURCE] [dir DEST] [int DURATION] ([int BYTES])
# Version: 0.1.1
# Depends: BK-2020-03: bkshuf v0.1.0

declare -Ag appRollCall # Associative array for storing app status
declare -Ag fileRollCall # Associative array for storing file status
declare -Ag dirRollCall # Associative array for storing dir status
declare -a music_codecs # Array for storing valid codec names (e.g. "aac" "mp3")

# Adjustable parameters
music_codecs=("vorbis" "aac" "mp3" "flac" "opus"); # whitelist of valid codec_names ffprobe might return
max_filename_length="255"; # max output filename length
min_file_duration="10"; # minimum duration per music file
max_file_duration="3600"; # maximum duration per music file
min_file_size="100000"; # minimum size per music file (bytes)
max_file_size="100000000"; # maximum size per music file (bytes)
siz_dest="600000000"; # default destination size limit: 600 MB
max_find_depth="10"; # max find depth

# Load env vars (bkshuf defaults for typical music albums)
if [[ ! -v BKSHUF_PARAM_LINEC ]]; then export BKSHUF_PARAM_LINEC=1000000; fi;
if [[ ! -v BKSHUF_PARAM_GSIZE ]]; then export BKSHUF_PARAM_GSIZE=10; fi;

yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails
checkapp() {
    # Desc: If arg is a command, save result in assoc array 'appRollCall'
    # Usage: checkapp arg1 arg2 arg3 ...
    # Version: 0.1.1
    # Input: global assoc. array 'appRollCall'
    # Output: adds/updates key(value) to global assoc array 'appRollCall'
    # Depends: bash 5.0.3
    local returnState    

    #===Process Args===
    for arg in "$@"; do
	if command -v "$arg" 1>/dev/null 2>&1; then # Check if arg is a valid command
	    appRollCall[$arg]="true";
	    if ! [ "$returnState" = "false" ]; then returnState="true"; fi;
	else
	    appRollCall[$arg]="false"; returnState="false";
	fi;
    done;

    #===Determine function return code===
    if [ "$returnState" = "true" ]; then
	return 0;
    else
	return 1;
    fi;
} # Check that app exists
checkfile() {
    # Desc: If arg is a file path, save result in assoc array 'fileRollCall'
    # Usage: checkfile arg1 arg2 arg3 ...
    # Version: 0.1.1
    # Input: global assoc. array 'fileRollCall'
    # Output: adds/updates key(value) to global assoc array 'fileRollCall';
    # Output: returns 0 if app found, 1 otherwise
    # Depends: bash 5.0.3
    local returnState

    #===Process Args===
    for arg in "$@"; do
	if [ -f "$arg" ]; then
	    fileRollCall["$arg"]="true";
	    if ! [ "$returnState" = "false" ]; then returnState="true"; fi;
	else
	    fileRollCall["$arg"]="false"; returnState="false";
	fi;
    done;
    
    #===Determine function return code===
    if [ "$returnState" = "true" ]; then
	return 0;
    else
	return 1;
    fi;
} # Check that file exists
checkdir() {
    # Desc: If arg is a dir path, save result in assoc array 'dirRollCall'
    # Usage: checkdir arg1 arg2 arg3 ...
    # Version 0.1.2
    # Input: global assoc. array 'dirRollCall'
    # Output: adds/updates key(value) to global assoc array 'dirRollCall';
    # Output: returns 0 if all args are dirs; 1 otherwise
    # Depends: Bash 5.0.3
    local returnState

    #===Process Args===
    for arg in "$@"; do
	if [ -z "$arg" ]; then
	    dirRollCall["(Unspecified Dirname(s))"]="false"; returnState="false";
	elif [ -d "$arg" ]; then
	    dirRollCall["$arg"]="true";
	    if ! [ "$returnState" = "false" ]; then returnState="true"; fi
	else
	    dirRollCall["$arg"]="false"; returnState="false";
	fi
    done
    
    #===Determine function return code===
    if [ "$returnState" = "true" ]; then
	return 0;
    else
	return 1;
    fi
} # Check that dir exists
displayMissing() {
    # Desc: Displays missing apps, files, and dirs
    # Usage: displayMissing
    # Version 1.0.0
    # Input: associative arrays: appRollCall, fileRollCall, dirRollCall
    # Output: stderr: messages indicating missing apps, file, or dirs
    # Output: returns exit code 0 if nothing missing; 1 otherwise
    # Depends: bash 5, checkAppFileDir()
    local missingApps value appMissing missingFiles fileMissing
    local missingDirs dirMissing

    #==BEGIN Display errors==
    #===BEGIN Display Missing Apps===
    missingApps="Missing apps  :";
    #for key in "${!appRollCall[@]}"; do echo "DEBUG:$key => ${appRollCall[$key]}"; done
    for key in "${!appRollCall[@]}"; do
	value="${appRollCall[$key]}";
	if [ "$value" = "false" ]; then
	    #echo "DEBUG:Missing apps: $key => $value";
	    missingApps="$missingApps""$key ";
	    appMissing="true";
	fi;
    done;
    if [ "$appMissing" = "true" ]; then  # Only indicate if an app is missing.
	echo "$missingApps" 1>&2;
    fi;
    unset value;
    #===END Display Missing Apps===

    #===BEGIN Display Missing Files===
    missingFiles="Missing files:";
    #for key in "${!fileRollCall[@]}"; do echo "DEBUG:$key => ${fileRollCall[$key]}"; done
    for key in "${!fileRollCall[@]}"; do
	value="${fileRollCall[$key]}";
	if [ "$value" = "false" ]; then
	    #echo "DEBUG:Missing files: $key => $value";
	    missingFiles="$missingFiles""$key ";
	    fileMissing="true";
	fi;
    done;
    if [ "$fileMissing" = "true" ]; then  # Only indicate if an app is missing.
	echo "$missingFiles" 1>&2;
    fi;
    unset value;
    #===END Display Missing Files===

    #===BEGIN Display Missing Directories===
    missingDirs="Missing dirs:";
    #for key in "${!dirRollCall[@]}"; do echo "DEBUG:$key => ${dirRollCall[$key]}"; done
    for key in "${!dirRollCall[@]}"; do
	value="${dirRollCall[$key]}";
	if [ "$value" = "false" ]; then
	    #echo "DEBUG:Missing dirs: $key => $value";
	    missingDirs="$missingDirs""$key ";
	    dirMissing="true";
	fi;
    done;
    if [ "$dirMissing" = "true" ]; then  # Only indicate if an dir is missing.
	echo "$missingDirs" 1>&2;
    fi;
    unset value;
    #===END Display Missing Directories===

    #==END Display errors==
    #==BEGIN Determine function return code===
    if [ "$appMissing" == "true" ] || [ "$fileMissing" == "true" ] || [ "$dirMissing" == "true" ]; then
	return 1;
    else
	return 0;
    fi
    #==END Determine function return code===
} # Display missing apps, files, dirs
showUsage() {
    # Desc: Display script usage information
    # Usage: showUsage
    # Version 0.0.1
    # Input: none
    # Output: stdout
    # Depends: GNU-coreutils 8.30 (cat)
    cat <<'EOF'

    DESCRIPTION:
      This script may be used to copy a random selection of files containing
      audio tracks from SOURCE to DEST.

    USAGE:
      bk-copy-rand-music [dir SOURCE] [dir DEST] [int DURATION] (int BYTES)

    EXAMPLE:
      bk-copy-rand-music ~/Music /tmp/music-sample 3600
      bk-copy-rand-music ~/Music /tmp/music-sample 3600 680000000

    DEPENDENCIES:
      ffprobe
      GNU Coreutils 8.30

    ENVIRONMENT VARIABLES
      BKSHUF_PARAM_LINEC (see `bkshuf` in BK-2020-03)
      BKSHUF_PARAM_GSIZE (see `bkshuf` in BK-2020-03)
EOF
} # Display information on how to use this script.
check_parsable_audio_ffprobe() {
    # Desc: Checks if ffprobe returns valid audio codec name for file
    # Usage: check_parsable_audio_ffprobe [path FILE]
    # Version: 0.0.1
    # Input: arg1: file path
    # Output: exit code 0 if returns valid codec name; 1 otherwise
    # Depends: ffprobe, die()
    local file_in ffprobe_out

    if [[ $# -ne 1 ]]; then die "ERROR:Invalid number of args:$#"; fi;
    
    file_in="$1";
    
    # Check if ffprobe detects an audio stream
    if ffprobe -v error -select_streams a -show_entries stream=codec_name -of default=nokey=1:noprint_wrappers=1 "$file_in" 1>/dev/null 2>&1; then
	return_state="true";
    else
	return_state="false";
    fi;

    # Fail if ffprobe returns no result
    ffprobe_out="$(ffprobe -v error -select_streams a -show_entries stream=codec_name -of default=nokey=1:noprint_wrappers=1 "$file_in")";
    if [[ -z $ffprobe_out ]]; then
	return_state="false";
    fi;
    
    # Report exit code
    if [[ $return_state = "true" ]]; then
	return 0;
    else
	return 1;
    fi;
} # Checks if file has valid codec name using ffprobe
get_audio_format() {
    # Desc: Gets audio format of file as string
    # Usage: get_audio_format arg1
    # Depends: ffprobe
    # Version: 0.0.1
    # Input: arg1: input file path
    # Output: stdout (if valid audio format)
    #         exit code 0 if audio file; 1 otherwise
    # Example: get_audio_format myvideo.mp4
    #   Note: Would return "opus" if full ffprobe report had 'Audio: opus, 48000 Hz, stereo, fltp'
    # Note: Not tested with videos containing multiple video streams
    # Ref/Attrib: [1] https://stackoverflow.com/questions/5618363/is-there-a-way-to-use-ffmpeg-to-determine-the-encoding-of-a-file-before-transcod
    #             [2] https://stackoverflow.com/questions/44123532/how-to-find-out-the-file-extension-for-extracting-audio-tracks-with-ffmpeg-and-p#comment88464070_50723126
    local audio_format file_in;
    local return_state;
    file_in="$1";

    # Return error exit code if not audio file
    ## Return error if ffprobe itself exited on error
    if ! ffprobe -v error -select_streams a -show_entries stream=codec_name -of default=nokey=1:noprint_wrappers=1 "$file_in" 1>/dev/null 2>&1; then
	return_state="false";
    fi;
    
    # Get audio format
    audio_format="$(ffprobe -v error -select_streams a -show_entries stream=codec_name -of default=nokey=1:noprint_wrappers=1 "$file_in")"; # see [1]

    ## Return error if audio format is incorrectly formatted (e.g. reject if contains spaces)
    pattern="^[[:alnum:]]+$"; # alphanumeric string with no spaces
    if [[ $audio_format =~ $pattern ]]; then
	return_state="true";
	# Report audio format
	echo "$audio_format";
    else
	return_state="false";
    fi;
    
    # Report exit code
    if [[ $return_state = "true" ]]; then
	return 0;
    else
	return 1;
    fi;
} # Get audio format as stdout
get_media_length() {
    # Use ffprobe to get media container length in seconds (float)
    # Usage: get_media_length arg1
    # Input:  arg1: path to file
    # Output: stdout: seconds (float)
    # Depends: ffprobe 4.1.8
    # Ref/Attrib: [1] How to get video duration in seconds? https://superuser.com/a/945604
    local file_in
    file_in="$1";
    if [[ ! -f $file_in ]]; then
	die "ERROR:Not a file:$file_in";
    fi;
    ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$file_in";
} # Get media container length in seconds via stdout
checkInt() {
    # Desc: Checks if arg is integer
    # Usage: checkInt arg
    # Input: arg: integer
    # Output: - return code 0 (if arg is integer)
    #         - return code 1 (if arg is not integer)
    # Example: if ! checkInt $arg; then echo "not int"; fi;
    # Version: 0.0.1
    local returnState

    #===Process Arg===
    if [[ $# -ne 1 ]]; then
	die "ERROR:Invalid number of arguments:$#";
    fi;
    
    RETEST1='^[0-9]+$'; # Regular Expression to test
    if [[ ! $1 =~ $RETEST1 ]] ; then
	returnState="false";
    else
	returnState="true";
    fi;

    #===Determine function return code===
    if [ "$returnState" = "true" ]; then
	return 0;
    else
	return 1;
    fi;
} # Checks if arg is integer
checkIsInArray() {
    # Desc: Checks if input arg is element in array
    # Usage: checkIsInArray arg1 arg2
    # Version: 0.0.1
    # Input: arg1: test string
    #        arg2: array
    # Output: exit code 0 if test string is in array; 1 otherwise
    # Example: checkIsInArray "foo" "${myArray[@]}"
    # Ref/Attrib: [1] How do I check if variable is an array? https://stackoverflow.com/a/27254437
    #             [2] How to pass an array as function argument? https://askubuntu.com/a/674347
    local return_state input arg1 string_test
    declare -a arg2 array_test
    input=("$@") # See [2]
    arg1="${input[0]}";
    arg2=("${input[@]:1}");
    #yell "DEBUG:input:${input[@]}";
    #yell "DEBUG:arg1:${arg1[@]}";
    #yell "DEBUG:arg2:${arg2[@]}";

    string_test="$arg1";
    array_test=("${arg2[@]}");

    #yell "DEBUG:string_test:$string_test";
    #yell "DEBUG:$(declare -p array_test)";
    for element in "${array_test[@]}"; do
	#yell "DEBUG:element:$element";
	if [[ "$element" =~ ^"$string_test" ]]; then
	    return_state="true";
	    continue;
	fi;
    done;
    
    # Report exit code
    if [[ $return_state == "true" ]]; then
    	return 0;
    else
    	return 1;
    fi;
} # Check if string is element in array
main() {
    # Desc: Main program
    # Input: arg1: path to source tree
    #        arg2: path to destination tree
    #        arg3: cumulative duration (seconds) of audio files in destination tree
    #        arg4: cumulative size (bytes) of audio files in destination tree (optional)
    #        assoc arrays: appRollCall, fileRollCall, dirRollCall
    #        env.var: BKSHUF_PARAM_LINEC (bkshuf)
    #                 BKSHUF_PARAM_GSIZE (bkshuf)
    #        arrays: music_codecs
    #        vars: max_filename_length, min_file_duration, max_file_duration,
    #                min_file_size, max_file_size, siz_dest, max_find_depth
    # Output: [none]
    # Depends: yell(), checkdir() 0.1.2, displayMissing() 1.0.0, GNU Coreutils 8.30
    #          BK-2020-03: bkshuf v0.1.0
    local arg1 arg2 arg3 dur_dest dir_source dir_dest
    declare -a list_files # array for files to be considered
    declare -a list_copy # array for files to be copied (string: "$dur,$fsize,$path")

    # Parse args
    arg1="$1";
    arg2="$2";
    arg3="$3";
    arg4="$4";
    if ! { [[ $# -eq 3 ]] || [[ $# -eq 4 ]]; }; then
        showUsage;
        die "ERROR:Invalid number of args:$#"; fi;

    # Check env vars
    if ! checkInt "$BKSHUF_PARAM_LINEC"; then
        die "FATAL:Not an int:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_LINEC"; fi;
    if ! checkInt "$BKSHUF_PARAM_GSIZE"; then
        die "FATAL:Not an int:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_GSIZE"; fi;
    
    ## Check duration
    if checkInt "$arg3"; then
	dur_dest="$arg3";
    else
	die "FATAL:Duration (seconds) not an int:$arg3"
    fi;

    ## Check size
    if [[ -n "$arg4" ]]; then
        if checkInt "$arg4"; then
            siz_dest="$arg4";
        else
            die "FATAL:Size (bytes) not an int:$arg4";
        fi;
    fi;
    
    ## Check directories
    if checkdir "$arg1" "$arg2"; then
	dir_source="$arg1";
	dir_dest="$arg2";
    else
	yell "ERROR:Directory error";
    fi;

    ## Check apps
    checkapp ffprobe bkshuf;

    if ! displayMissing; then
	showUsage;
	die "ERROR:Check missing resources.";
    fi;

    yell "STATUS:Working...";

    # Populate list_files array
    while read -r line; do
        list_files+=("$line");
    done < <(find -L "$dir_source" -maxdepth "$max_find_depth" -type f | sort);

    # Test and add random elements of list_files to list_copy
    dur=0; # Initialize duration
    siz=0; # Initialize size
    n=0; # Initialize loop counter
    dur_cand_w=1; # Init duration digit width counter
    siz_cand_w=1; # Init size digit width counter
    ## Get element count of list_files array
    file_count="${#list_files[@]}";
    while read -r line && \
            [[ $dur -le $dur_dest ]] && \
            [[ $siz -le $siz_dest ]] && \
            [[ $n -le $file_count ]]; do
        #yell "DEBUG:list_copy building loop:$n";
        path_candfile="$line"; # path of candidate file

	### Check if has valid codec
	if ! check_parsable_audio_ffprobe "$path_candfile"; then continue; fi; # reject
	
	### Check if desired codec
	file_format="$(get_audio_format "$path_candfile")";
	if ! checkIsInArray "$file_format" "${music_codecs[@]}"; then continue; fi; # reject

	### Check and save duration
	dur_cand="$(get_media_length "$path_candfile")";
	dur_cand="${dur_cand%%.*}"; # convert float to int
        if [[ "$((dur + dur_cand))" -gt "$dur_dest" ]]; then break; fi; # no more
        dur_cand_wnow="$(printf "%s" "$dur_cand" | wc -m)"; # duration width count
        if [[ $dur_cand_wnow -gt $dur_cand_w ]]; then
            dur_cand_w="$dur_cand_wnow"; fi;
	if ! checkInt "$dur_cand"; then continue; fi; # reject
	if [[ "$dur_cand" -lt "$min_file_duration" ]]; then continue; fi; # reject
        if [[ "$dur_cand" -gt "$max_file_duration" ]]; then continue; fi; # reject

        ### Check and save size
        siz_cand="$(du -b "$path_candfile" | awk '{ print $1 }')"; # size in bytes
        if [[ "$((siz + siz_cand))" -gt "$siz_dest" ]]; then break; fi; # no more
        siz_cand_wnow="$(printf "%s" "$siz_cand" | wc -m)"; # size  width count
        if [[ $siz_cand_wnow -gt $siz_cand_w ]]; then
            siz_cand_w="$siz_cand_wnow"; fi;
	if ! checkInt "$siz_cand"; then continue; fi; # reject
	if [[ "$siz_cand" -lt "$min_file_size" ]]; then continue; fi; # reject
        if [[ "$siz_cand" -gt "$max_file_size" ]]; then continue; fi; # reject

	### Add/update candfile to array:
        ###   list_copy (array with "duration, size, path")
	#yell "DEBUG:Adding $path_candfile";
        #printf "DEBUG:%8d,%8d,%s\n" "$dur_cand" "$siz_cand" "$path_candfile" 1>&2;
        #printf "DEBUG:dur:%s\n" "$dur" 1>&2;
        #printf "DEBUG:siz:%s\n" "$siz" 1>&2;
        list_copy+=("$dur_cand,$siz_cand,$path_candfile"); # for copying with order

	### Update total duration $dur and total size $siz
        dur="$((dur + dur_cand))";
        siz="$((siz + siz_cand))";
	#yell "DEBUG:dur:$dur";
	#yell "DEBUG:siz:$siz";

	((n++));
    done < <(printf "%s\n" "${list_files[@]}" | bkshuf);

    #yell "DEBUG:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_LINEC";
    #yell "DEBUG:BKSHUF_PARAM_GSIZE:$BKSHUF_PARAM_GSIZE";

    n=0; # Initialize loop counter
    num_w="$(printf "%s" "${#list_copy[@]}" | wc -m)"; # init file number format
    num_fmt="%0""$num_w""d";
    path_log_output="$dir_dest"/COPY.log;
    printf "num,fingerprint,duration,size,original_path\n" >> "$path_log_output";
    # Copy files in list_copy to dir_dest;
    while read -r line; do
        #yell "DEBUG:line:$line"; # debug
        fdur="$(printf "%s" "$line" | cut -d',' -f1)";
        fsize="$(printf "%s" "$line" | cut -d',' -f2)";
        fpath="$(printf "%s" "$line" | cut -d',' -f3-)";
        ## Get basename of path
        file_basename="$(basename "$fpath")";

        ## Get 16-character b2sum fingerprint (for different files that share basename)
	fingerprint="$(b2sum -l32 "$fpath" | awk '{print $1}' )";

        ## Form output filename
        num="$(printf "$num_fmt" "$n")";
	file_name="$num"_"$fingerprint".."$file_basename";
	file_name="${file_name:0:$max_filename_length}"; # Limit filename length (e.g. Windows has max of 255 characters)

	## Form output path
	path_output="$dir_dest"/"$file_name";
	
	## Copy
	must cp "$fpath" "$path_output" && yell "NOTICE:Copied ($fdur seconds): $fpath ";
	#yell "DEBUG:Copied $file_basename to $dur_dest.";

	## Append log
        fpath_can="$(readlink -f "$fpath")"; # resolve symlinks to canonical path
        log_fmt="%s,%s,%""$dur_cand_w""d,%""$siz_cand_w""d,%s\n"; # e.g. "%s,%3d,%5d,%s" if dur_cand_w=3 and siz_cand_w=5
        printf "$log_fmt" "$num" "$fingerprint" "$fdur" "$fsize" "$fpath_can" >> "$path_log_output";

	((n++));
	unset file_basename path_output
    done < <(printf "%s\n" "${list_copy[@]}");

    # Report total duration and size
    yell "NOTICE:Total duration (seconds):$dur";
    yell "NOTICE:Total size (bytes):$siz";

} # Main program

main "$@";

# Author: Steven Baltakatei Sandoval
# License: GPLv3+

# bkshuf v0.1.0
#   Author: Steven Baltakatei Sandoval
#   License: GPLv3+
#   URL: https://gitlab.com/baltakatei/baltakatei-exdev/-/blob/b9e8b771e985fcdf26ba8b9ccb8e31b62da757d3/unitproc/bkshuf