--- /dev/null
+#!/usr/bin/env bash
+# Desc: Copies random audio files
+# Usage: bk-copy-rand-music [dir SOURCE] [dir DEST] [int DURATION] ([int BYTES])
+# Version: 0.1.0
+# Depends: BK-2020-03: bkshuf v0.1.0
+
+declare -Ag appRollCall # Associative array for storing app status
+declare -Ag fileRollCall # Associative array for storing file status
+declare -Ag dirRollCall # Associative array for storing dir status
+declare -a music_codecs # Array for storing valid codec names (e.g. "aac" "mp3")
+
+# Adjustable parameters
+music_codecs=("vorbis" "aac" "mp3" "flac" "opus"); # whitelist of valid codec_names ffprobe might return
+max_filename_length="255"; # max output filename length
+min_file_duration="10"; # minimum duration per music file
+max_file_duration="3600"; # maximum duration per music file
+min_file_size="100000"; # minimum size per music file (bytes)
+max_file_size="100000000"; # maximum size per music file (bytes)
+siz_dest="600000000"; # default destination size limit: 600 MB
+max_find_depth="10"; # max find depth
+
+
+yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
+die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
+must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails
+checkapp() {
+ # Desc: If arg is a command, save result in assoc array 'appRollCall'
+ # Usage: checkapp arg1 arg2 arg3 ...
+ # Version: 0.1.1
+ # Input: global assoc. array 'appRollCall'
+ # Output: adds/updates key(value) to global assoc array 'appRollCall'
+ # Depends: bash 5.0.3
+ local returnState
+
+ #===Process Args===
+ for arg in "$@"; do
+ if command -v "$arg" 1>/dev/null 2>&1; then # Check if arg is a valid command
+ appRollCall[$arg]="true";
+ if ! [ "$returnState" = "false" ]; then returnState="true"; fi;
+ else
+ appRollCall[$arg]="false"; returnState="false";
+ fi;
+ done;
+
+ #===Determine function return code===
+ if [ "$returnState" = "true" ]; then
+ return 0;
+ else
+ return 1;
+ fi;
+} # Check that app exists
+checkfile() {
+ # Desc: If arg is a file path, save result in assoc array 'fileRollCall'
+ # Usage: checkfile arg1 arg2 arg3 ...
+ # Version: 0.1.1
+ # Input: global assoc. array 'fileRollCall'
+ # Output: adds/updates key(value) to global assoc array 'fileRollCall';
+ # Output: returns 0 if app found, 1 otherwise
+ # Depends: bash 5.0.3
+ local returnState
+
+ #===Process Args===
+ for arg in "$@"; do
+ if [ -f "$arg" ]; then
+ fileRollCall["$arg"]="true";
+ if ! [ "$returnState" = "false" ]; then returnState="true"; fi;
+ else
+ fileRollCall["$arg"]="false"; returnState="false";
+ fi;
+ done;
+
+ #===Determine function return code===
+ if [ "$returnState" = "true" ]; then
+ return 0;
+ else
+ return 1;
+ fi;
+} # Check that file exists
+checkdir() {
+ # Desc: If arg is a dir path, save result in assoc array 'dirRollCall'
+ # Usage: checkdir arg1 arg2 arg3 ...
+ # Version 0.1.2
+ # Input: global assoc. array 'dirRollCall'
+ # Output: adds/updates key(value) to global assoc array 'dirRollCall';
+ # Output: returns 0 if all args are dirs; 1 otherwise
+ # Depends: Bash 5.0.3
+ local returnState
+
+ #===Process Args===
+ for arg in "$@"; do
+ if [ -z "$arg" ]; then
+ dirRollCall["(Unspecified Dirname(s))"]="false"; returnState="false";
+ elif [ -d "$arg" ]; then
+ dirRollCall["$arg"]="true";
+ if ! [ "$returnState" = "false" ]; then returnState="true"; fi
+ else
+ dirRollCall["$arg"]="false"; returnState="false";
+ fi
+ done
+
+ #===Determine function return code===
+ if [ "$returnState" = "true" ]; then
+ return 0;
+ else
+ return 1;
+ fi
+} # Check that dir exists
+displayMissing() {
+ # Desc: Displays missing apps, files, and dirs
+ # Usage: displayMissing
+ # Version 1.0.0
+ # Input: associative arrays: appRollCall, fileRollCall, dirRollCall
+ # Output: stderr: messages indicating missing apps, file, or dirs
+ # Output: returns exit code 0 if nothing missing; 1 otherwise
+ # Depends: bash 5, checkAppFileDir()
+ local missingApps value appMissing missingFiles fileMissing
+ local missingDirs dirMissing
+
+ #==BEGIN Display errors==
+ #===BEGIN Display Missing Apps===
+ missingApps="Missing apps :";
+ #for key in "${!appRollCall[@]}"; do echo "DEBUG:$key => ${appRollCall[$key]}"; done
+ for key in "${!appRollCall[@]}"; do
+ value="${appRollCall[$key]}";
+ if [ "$value" = "false" ]; then
+ #echo "DEBUG:Missing apps: $key => $value";
+ missingApps="$missingApps""$key ";
+ appMissing="true";
+ fi;
+ done;
+ if [ "$appMissing" = "true" ]; then # Only indicate if an app is missing.
+ echo "$missingApps" 1>&2;
+ fi;
+ unset value;
+ #===END Display Missing Apps===
+
+ #===BEGIN Display Missing Files===
+ missingFiles="Missing files:";
+ #for key in "${!fileRollCall[@]}"; do echo "DEBUG:$key => ${fileRollCall[$key]}"; done
+ for key in "${!fileRollCall[@]}"; do
+ value="${fileRollCall[$key]}";
+ if [ "$value" = "false" ]; then
+ #echo "DEBUG:Missing files: $key => $value";
+ missingFiles="$missingFiles""$key ";
+ fileMissing="true";
+ fi;
+ done;
+ if [ "$fileMissing" = "true" ]; then # Only indicate if an app is missing.
+ echo "$missingFiles" 1>&2;
+ fi;
+ unset value;
+ #===END Display Missing Files===
+
+ #===BEGIN Display Missing Directories===
+ missingDirs="Missing dirs:";
+ #for key in "${!dirRollCall[@]}"; do echo "DEBUG:$key => ${dirRollCall[$key]}"; done
+ for key in "${!dirRollCall[@]}"; do
+ value="${dirRollCall[$key]}";
+ if [ "$value" = "false" ]; then
+ #echo "DEBUG:Missing dirs: $key => $value";
+ missingDirs="$missingDirs""$key ";
+ dirMissing="true";
+ fi;
+ done;
+ if [ "$dirMissing" = "true" ]; then # Only indicate if an dir is missing.
+ echo "$missingDirs" 1>&2;
+ fi;
+ unset value;
+ #===END Display Missing Directories===
+
+ #==END Display errors==
+ #==BEGIN Determine function return code===
+ if [ "$appMissing" == "true" ] || [ "$fileMissing" == "true" ] || [ "$dirMissing" == "true" ]; then
+ return 1;
+ else
+ return 0;
+ fi
+ #==END Determine function return code===
+} # Display missing apps, files, dirs
+showUsage() {
+ # Desc: Display script usage information
+ # Usage: showUsage
+ # Version 0.0.1
+ # Input: none
+ # Output: stdout
+ # Depends: GNU-coreutils 8.30 (cat)
+ cat <<'EOF'
+
+ DESCRIPTION:
+ This script may be used to copy a random selection of files containing
+ audio tracks from SOURCE to DEST.
+
+ USAGE:
+ bk-copy-rand-music [dir SOURCE] [dir DEST] [int DURATION] (int BYTES)
+
+ EXAMPLE:
+ bk-copy-rand-music ~/Music /tmp/music-sample 3600
+ bk-copy-rand-music ~/Music /tmp/music-sample 3600 680000000
+
+ DEPENDENCIES:
+ ffprobe
+ GNU Coreutils 8.30
+EOF
+} # Display information on how to use this script.
+check_parsable_audio_ffprobe() {
+ # Desc: Checks if ffprobe returns valid audio codec name for file
+ # Usage: check_parsable_audio_ffprobe [path FILE]
+ # Version: 0.0.1
+ # Input: arg1: file path
+ # Output: exit code 0 if returns valid codec name; 1 otherwise
+ # Depends: ffprobe, die()
+ local file_in ffprobe_out
+
+ if [[ $# -ne 1 ]]; then die "ERROR:Invalid number of args:$#"; fi;
+
+ file_in="$1";
+
+ # Check if ffprobe detects an audio stream
+ if ffprobe -v error -select_streams a -show_entries stream=codec_name -of default=nokey=1:noprint_wrappers=1 "$file_in" 1>/dev/null 2>&1; then
+ return_state="true";
+ else
+ return_state="false";
+ fi;
+
+ # Fail if ffprobe returns no result
+ ffprobe_out="$(ffprobe -v error -select_streams a -show_entries stream=codec_name -of default=nokey=1:noprint_wrappers=1 "$file_in")";
+ if [[ -z $ffprobe_out ]]; then
+ return_state="false";
+ fi;
+
+ # Report exit code
+ if [[ $return_state = "true" ]]; then
+ return 0;
+ else
+ return 1;
+ fi;
+} # Checks if file has valid codec name using ffprobe
+get_audio_format() {
+ # Desc: Gets audio format of file as string
+ # Usage: get_audio_format arg1
+ # Depends: ffprobe
+ # Version: 0.0.1
+ # Input: arg1: input file path
+ # Output: stdout (if valid audio format)
+ # exit code 0 if audio file; 1 otherwise
+ # Example: get_audio_format myvideo.mp4
+ # Note: Would return "opus" if full ffprobe report had 'Audio: opus, 48000 Hz, stereo, fltp'
+ # Note: Not tested with videos containing multiple video streams
+ # Ref/Attrib: [1] https://stackoverflow.com/questions/5618363/is-there-a-way-to-use-ffmpeg-to-determine-the-encoding-of-a-file-before-transcod
+ # [2] https://stackoverflow.com/questions/44123532/how-to-find-out-the-file-extension-for-extracting-audio-tracks-with-ffmpeg-and-p#comment88464070_50723126
+ local audio_format file_in;
+ local return_state;
+ file_in="$1";
+
+ # Return error exit code if not audio file
+ ## Return error if ffprobe itself exited on error
+ if ! ffprobe -v error -select_streams a -show_entries stream=codec_name -of default=nokey=1:noprint_wrappers=1 "$file_in" 1>/dev/null 2>&1; then
+ return_state="false";
+ fi;
+
+ # Get audio format
+ audio_format="$(ffprobe -v error -select_streams a -show_entries stream=codec_name -of default=nokey=1:noprint_wrappers=1 "$file_in")"; # see [1]
+
+ ## Return error if audio format is incorrectly formatted (e.g. reject if contains spaces)
+ pattern="^[[:alnum:]]+$"; # alphanumeric string with no spaces
+ if [[ $audio_format =~ $pattern ]]; then
+ return_state="true";
+ # Report audio format
+ echo "$audio_format";
+ else
+ return_state="false";
+ fi;
+
+ # Report exit code
+ if [[ $return_state = "true" ]]; then
+ return 0;
+ else
+ return 1;
+ fi;
+} # Get audio format as stdout
+get_media_length() {
+ # Use ffprobe to get media container length in seconds (float)
+ # Usage: get_media_length arg1
+ # Input: arg1: path to file
+ # Output: stdout: seconds (float)
+ # Depends: ffprobe 4.1.8
+ # Ref/Attrib: [1] How to get video duration in seconds? https://superuser.com/a/945604
+ local file_in
+ file_in="$1";
+ if [[ ! -f $file_in ]]; then
+ die "ERROR:Not a file:$file_in";
+ fi;
+ ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$file_in";
+} # Get media container length in seconds via stdout
+checkInt() {
+ # Desc: Checks if arg is integer
+ # Usage: checkInt arg
+ # Input: arg: integer
+ # Output: - return code 0 (if arg is integer)
+ # - return code 1 (if arg is not integer)
+ # Example: if ! checkInt $arg; then echo "not int"; fi;
+ # Version: 0.0.1
+ local returnState
+
+ #===Process Arg===
+ if [[ $# -ne 1 ]]; then
+ die "ERROR:Invalid number of arguments:$#";
+ fi;
+
+ RETEST1='^[0-9]+$'; # Regular Expression to test
+ if [[ ! $1 =~ $RETEST1 ]] ; then
+ returnState="false";
+ else
+ returnState="true";
+ fi;
+
+ #===Determine function return code===
+ if [ "$returnState" = "true" ]; then
+ return 0;
+ else
+ return 1;
+ fi;
+} # Checks if arg is integer
+checkIsInArray() {
+ # Desc: Checks if input arg is element in array
+ # Usage: checkIsInArray arg1 arg2
+ # Version: 0.0.1
+ # Input: arg1: test string
+ # arg2: array
+ # Output: exit code 0 if test string is in array; 1 otherwise
+ # Example: checkIsInArray "foo" "${myArray[@]}"
+ # Ref/Attrib: [1] How do I check if variable is an array? https://stackoverflow.com/a/27254437
+ # [2] How to pass an array as function argument? https://askubuntu.com/a/674347
+ local return_state input arg1 string_test
+ declare -a arg2 array_test
+ input=("$@") # See [2]
+ arg1="${input[0]}";
+ arg2=("${input[@]:1}");
+ #yell "DEBUG:input:${input[@]}";
+ #yell "DEBUG:arg1:${arg1[@]}";
+ #yell "DEBUG:arg2:${arg2[@]}";
+
+ string_test="$arg1";
+ array_test=("${arg2[@]}");
+
+ #yell "DEBUG:string_test:$string_test";
+ #yell "DEBUG:$(declare -p array_test)";
+ for element in "${array_test[@]}"; do
+ #yell "DEBUG:element:$element";
+ if [[ "$element" =~ ^"$string_test" ]]; then
+ return_state="true";
+ continue;
+ fi;
+ done;
+
+ # Report exit code
+ if [[ $return_state == "true" ]]; then
+ return 0;
+ else
+ return 1;
+ fi;
+} # Check if string is element in array
+main() {
+ # Desc: Main program
+ # Input: arg1: path to source tree
+ # arg2: path to destination tree
+ # arg3: cumulative duration (seconds) of audio files in destination tree
+ # arg4: cumulative size (bytes) of audio files in destination tree (optional)
+ # assoc arrays: appRollCall, fileRollCall, dirRollCall
+ # env.var: BKSHUF_PARAM_LINEC
+ # BKSHUF_PARAM_GSIZE
+ # arrays: music_codecs
+ # vars: max_filename_length, min_file_duration, max_file_duration,
+ # min_file_size, max_file_size, siz_dest, max_find_depth
+ # Output: [none]
+ # Depends: yell(), checkdir() 0.1.2, displayMissing() 1.0.0, GNU Coreutils 8.30
+ # BK-2020-03: bkshuf v0.1.0
+ local arg1 arg2 arg3 dur_dest dir_source dir_dest
+ declare -a list_files # array for files to be considered
+ declare -a list_copy_sa # simple array for files to be copied (string: "$dur,$path")
+
+ # Parse args
+ arg1="$1";
+ arg2="$2";
+ arg3="$3";
+ arg4="$4";
+ if ! ([[ $# -eq 3 ]] || [[ $# -eq 4 ]]); then showUsage; die "ERROR:Invalid number of args:$#"; fi;
+
+ ## Check duration
+ if checkInt "$arg3"; then
+ dur_dest="$arg3";
+ else
+ yell "ERROR:Duration (seconds) not an int:$arg3"
+ fi;
+
+ ## Check size
+ if [[ -n "$arg4" ]]; then
+ if checkInt "$arg4"; then
+ siz_dest="$arg4";
+ else
+ yell "ERROR:Size (bytes) not an int:$arg4";
+ fi;
+ fi;
+
+ ## Check directories
+ if checkdir "$arg1" "$arg2"; then
+ dir_source="$arg1";
+ dir_dest="$arg2";
+ else
+ yell "ERROR:Directory error";
+ fi;
+
+ ## Check apps
+ checkapp ffprobe bkshuf;
+
+ if ! displayMissing; then
+ showUsage;
+ die "ERROR:Check missing resources.";
+ fi;
+
+ yell "STATUS:Working...";
+
+ # Populate list_files array
+ while read -r line; do
+ list_files+=("$line");
+ done < <(find -L "$dir_source" -maxdepth "$max_find_depth" -type f | sort);
+
+ # Test and add random elements of list_files to list_copy
+ dur=0; # Initialize duration
+ siz=0; # Initialize size
+ n=0; # Initialize loop counter
+ dur_cand_w=1; # Init duration digit width counter
+ siz_cand_w=1; # Init size digit width counter
+ ## Get element count of list_files array
+ file_count="${#list_files[@]}";
+ while read -r line && \
+ [[ $dur -le $dur_dest ]] && \
+ [[ $siz -le $siz_dest ]] && \
+ [[ $n -le $file_count ]]; do
+ #yell "DEBUG:list_copy building loop:$n";
+ path_candfile="$line"; # path of candidate file
+
+ ### Check if has valid codec
+ if ! check_parsable_audio_ffprobe "$path_candfile"; then continue; fi; # reject
+
+ ### Check if desired codec
+ file_format="$(get_audio_format "$path_candfile")";
+ if ! checkIsInArray "$file_format" "${music_codecs[@]}"; then continue; fi; # reject
+
+ ### Check and save duration
+ dur_cand="$(get_media_length "$path_candfile")";
+ dur_cand="${dur_cand%%.*}"; # convert float to int
+ if [[ "$((dur + dur_cand))" -gt "$dur_dest" ]]; then continue; fi; # reject
+ dur_cand_wnow="$(printf "%s" "$dur_cand" | wc -m)"; # duration width count
+ if [[ $dur_cand_wnow -gt $dur_cand_w ]]; then
+ dur_cand_w="$dur_cand_wnow"; fi;
+ if ! checkInt "$dur_cand"; then continue; fi; # reject
+ if [[ "$dur_cand" -lt "$min_file_duration" ]]; then continue; fi; # reject
+ if [[ "$dur_cand" -gt "$max_file_duration" ]]; then continue; fi; # reject
+
+ ### Check and save size
+ siz_cand="$(du -b "$path_candfile" | awk '{ print $1 }')"; # size in bytes
+ siz_cand_wnow="$(printf "%s" "$siz_cand" | wc -m)"; # size width count
+ if [[ $siz_cand_wnow -gt $siz_cand_w ]]; then
+ siz_cand_w="$siz_cand_wnow"; fi;
+ if ! checkInt "$siz_cand"; then continue; fi; # reject
+ if [[ "$siz_cand" -lt "$min_file_size" ]]; then continue; fi; # reject
+ if [[ "$siz_cand" -gt "$max_file_size" ]]; then continue; fi; # reject
+
+ ### Add/update candfile to array:
+ ### list_copy_sa (simple array with only paths)
+ #yell "DEBUG:Adding $path_candfile";
+ list_copy_sa+=("$dur_cand,$siz_cand,$path_candfile"); # for copying with order
+
+ ### Update total duration $dur and total size $siz
+ dur="$((dur + dur_cand))";
+ siz="$((siz + siz_cand))";
+ #yell "DEBUG:dur:$dur";
+ #yell "DEBUG:siz:$siz";
+
+ ((n++));
+ done < <(printf "%s\n" "${list_files[@]}" | bkshuf);
+
+ n=0; # Initialize loop counter
+ num_w="$(printf "%s" "${#list_copy_sa[@]}" | wc -m)"; # init file number format
+ num_fmt="%0""$num_w""d";
+ path_log_output="$dir_dest"/COPY.log;
+ printf "num,fingerprint,duration,size,original_path\n" >> "$path_log_output";
+ # Copy files in list_copy to dir_dest;
+ while read -r line; do
+ yell "DEBUG:line:$line"; # debug
+ fdur="$(printf "%s" "$line" | cut -d',' -f1)";
+ fsize="$(printf "%s" "$line" | cut -d',' -f2)";
+ fpath="$(printf "%s" "$line" | cut -d',' -f3-)";
+ ## Get basename of path
+ file_basename="$(basename "$fpath")";
+
+ ## Get 16-character b2sum fingerprint (for different files that share basename)
+ fingerprint="$(b2sum -l32 "$fpath" | awk '{print $1}' )";
+
+ ## Form output filename
+ num="$(printf "$num_fmt" "$n")";
+ file_name="$num"_"$fingerprint".."$file_basename";
+ file_name="${file_name:0:$max_filename_length}"; # Limit filename length (e.g. Windows has max of 255 characters)
+
+ ## Form output path
+ path_output="$dir_dest"/"$file_name";
+
+ ## Copy
+ must cp "$fpath" "$path_output" && yell "NOTICE:Copied ($fdur seconds): $fpath ";
+ #yell "DEBUG:Copied $file_basename to $dur_dest.";
+
+ ## Append log
+ fpath_can="$(readlink -f "$fpath")"; # resolve symlinks to canonical path
+ log_fmt="%s,%s,%""$dur_cand_w""d,%""$siz_cand_w""d,%s\n"; # e.g. "%s,%3d,%5d,%s" if dur_cand_w=3 and siz_cand_w=5
+ #yell "DEBUG:log_fmt:$log_fmt"; sleep 10; # debug
+ printf "$log_fmt" "$num" "$fingerprint" "$fdur" "$fsize" "$fpath_can" >> "$path_log_output";
+
+ ((n++));
+ unset file_basename path_output
+ done < <(printf "%s\n" "${list_copy_sa[@]}");
+
+ # Report total duration and size
+ yell "NOTICE:Total duration (seconds):$dur";
+ yell "NOTICE:Total size (bytes):$siz";
+
+} # Main program
+
+main "$@";
+
+# Author: Steven Baltakatei Sandoval
+# License: GPLv3+