#!/usr/bin/env bash # Desc: Copies random audio files # Usage: bk-copy-rand-music [dir SOURCE] [dir DEST] [int DURATION] ([int BYTES]) # Version: 0.4.0 # Depends: BK-2020-03: bkshuf v0.1.0 declare -Ag appRollCall # Associative array for storing app status declare -Ag fileRollCall # Associative array for storing file status declare -Ag dirRollCall # Associative array for storing dir status declare -a music_codecs # Array for storing valid codec names (e.g. "aac" "mp3") # Adjustable parameters music_codecs=("vorbis" "aac" "mp3" "flac" "opus" "eac3"); # whitelist of valid codec_names ffprobe might return ext_ignore=".ots\$|.mid\$|.json\$|.gz\$|.jpg\$|.png\$|.asc\$|.pdf\$|.txt\$|.vtt\$|\.SUM|.zip\$|.xz\$|.org\$|.txt\$"; # blacklist of file extensions for 'grep -Evi' max_filename_length="255"; # max output filename length min_file_duration="30"; # minimum duration per music file max_file_duration="3600"; # maximum duration per music file min_file_size="100000"; # minimum size per music file (bytes) max_file_size="100000000"; # maximum size per music file (bytes) siz_dest="600000000"; # default destination size limit: 600 MB max_find_depth="10"; # max find depth # Load env vars (bkshuf defaults for typical music albums) if [[ ! -v BKSHUF_PARAM_LINEC ]]; then export BKSHUF_PARAM_LINEC=1000000; fi; if [[ ! -v BKSHUF_PARAM_GSIZE ]]; then export BKSHUF_PARAM_GSIZE=10; fi; yell() { echo "$0: $*" >&2; } # print script path and all args to stderr die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails checkapp() { # Desc: If arg is a command, save result in assoc array 'appRollCall' # Usage: checkapp arg1 arg2 arg3 ... # Version: 0.1.1 # Input: global assoc. array 'appRollCall' # Output: adds/updates key(value) to global assoc array 'appRollCall' # Depends: bash 5.0.3 local returnState #===Process Args=== for arg in "$@"; do if command -v "$arg" 1>/dev/null 2>&1; then # Check if arg is a valid command appRollCall[$arg]="true"; if ! [ "$returnState" = "false" ]; then returnState="true"; fi; else appRollCall[$arg]="false"; returnState="false"; fi; done; #===Determine function return code=== if [ "$returnState" = "true" ]; then return 0; else return 1; fi; } # Check that app exists checkfile() { # Desc: If arg is a file path, save result in assoc array 'fileRollCall' # Usage: checkfile arg1 arg2 arg3 ... # Version: 0.1.1 # Input: global assoc. array 'fileRollCall' # Output: adds/updates key(value) to global assoc array 'fileRollCall'; # Output: returns 0 if app found, 1 otherwise # Depends: bash 5.0.3 local returnState #===Process Args=== for arg in "$@"; do if [ -f "$arg" ]; then fileRollCall["$arg"]="true"; if ! [ "$returnState" = "false" ]; then returnState="true"; fi; else fileRollCall["$arg"]="false"; returnState="false"; fi; done; #===Determine function return code=== if [ "$returnState" = "true" ]; then return 0; else return 1; fi; } # Check that file exists checkdir() { # Desc: If arg is a dir path, save result in assoc array 'dirRollCall' # Usage: checkdir arg1 arg2 arg3 ... # Version 0.1.2 # Input: global assoc. array 'dirRollCall' # Output: adds/updates key(value) to global assoc array 'dirRollCall'; # Output: returns 0 if all args are dirs; 1 otherwise # Depends: Bash 5.0.3 local returnState #===Process Args=== for arg in "$@"; do if [ -z "$arg" ]; then dirRollCall["(Unspecified Dirname(s))"]="false"; returnState="false"; elif [ -d "$arg" ]; then dirRollCall["$arg"]="true"; if ! [ "$returnState" = "false" ]; then returnState="true"; fi else dirRollCall["$arg"]="false"; returnState="false"; fi done #===Determine function return code=== if [ "$returnState" = "true" ]; then return 0; else return 1; fi } # Check that dir exists displayMissing() { # Desc: Displays missing apps, files, and dirs # Usage: displayMissing # Version 1.0.0 # Input: associative arrays: appRollCall, fileRollCall, dirRollCall # Output: stderr: messages indicating missing apps, file, or dirs # Output: returns exit code 0 if nothing missing; 1 otherwise # Depends: bash 5, checkAppFileDir() local missingApps value appMissing missingFiles fileMissing local missingDirs dirMissing #==BEGIN Display errors== #===BEGIN Display Missing Apps=== missingApps="Missing apps :"; #for key in "${!appRollCall[@]}"; do echo "DEBUG:$key => ${appRollCall[$key]}"; done for key in "${!appRollCall[@]}"; do value="${appRollCall[$key]}"; if [ "$value" = "false" ]; then #echo "DEBUG:Missing apps: $key => $value"; missingApps="$missingApps""$key "; appMissing="true"; fi; done; if [ "$appMissing" = "true" ]; then # Only indicate if an app is missing. echo "$missingApps" 1>&2; fi; unset value; #===END Display Missing Apps=== #===BEGIN Display Missing Files=== missingFiles="Missing files:"; #for key in "${!fileRollCall[@]}"; do echo "DEBUG:$key => ${fileRollCall[$key]}"; done for key in "${!fileRollCall[@]}"; do value="${fileRollCall[$key]}"; if [ "$value" = "false" ]; then #echo "DEBUG:Missing files: $key => $value"; missingFiles="$missingFiles""$key "; fileMissing="true"; fi; done; if [ "$fileMissing" = "true" ]; then # Only indicate if an app is missing. echo "$missingFiles" 1>&2; fi; unset value; #===END Display Missing Files=== #===BEGIN Display Missing Directories=== missingDirs="Missing dirs:"; #for key in "${!dirRollCall[@]}"; do echo "DEBUG:$key => ${dirRollCall[$key]}"; done for key in "${!dirRollCall[@]}"; do value="${dirRollCall[$key]}"; if [ "$value" = "false" ]; then #echo "DEBUG:Missing dirs: $key => $value"; missingDirs="$missingDirs""$key "; dirMissing="true"; fi; done; if [ "$dirMissing" = "true" ]; then # Only indicate if an dir is missing. echo "$missingDirs" 1>&2; fi; unset value; #===END Display Missing Directories=== #==END Display errors== #==BEGIN Determine function return code=== if [ "$appMissing" == "true" ] || [ "$fileMissing" == "true" ] || [ "$dirMissing" == "true" ]; then return 1; else return 0; fi #==END Determine function return code=== } # Display missing apps, files, dirs showUsage() { # Desc: Display script usage information # Usage: showUsage # Version 0.0.1 # Input: none # Output: stdout # Depends: GNU-coreutils 8.30 (cat) cat <<'EOF' DESCRIPTION: This script may be used to copy a random selection of files containing audio tracks from SOURCE to DEST. USAGE: bk-copy-rand-music [dir SOURCE] [dir DEST] [int DURATION] (int BYTES) EXAMPLE: bk-copy-rand-music ~/Music /tmp/music-sample 3600 bk-copy-rand-music ~/Music /tmp/music-sample 3600 680000000 DEPENDENCIES: ffprobe GNU Coreutils 8.30 ENVIRONMENT VARIABLES BKSHUF_PARAM_LINEC (see `bkshuf` in BK-2020-03) BKSHUF_PARAM_GSIZE (see `bkshuf` in BK-2020-03) EOF } # Display information on how to use this script. check_parsable_audio_ffprobe() { # Desc: Checks if ffprobe returns valid audio codec name for file # Usage: check_parsable_audio_ffprobe [path FILE] # Version: 0.0.1 # Input: arg1: file path # Output: exit code 0 if returns valid codec name; 1 otherwise # Depends: ffprobe, die() local file_in ffprobe_out if [[ $# -ne 1 ]]; then die "ERROR:Invalid number of args:$#"; fi; file_in="$1"; # Check if ffprobe detects an audio stream if ffprobe -v error -select_streams a -show_entries stream=codec_name -of default=nokey=1:noprint_wrappers=1 "$file_in" 1>/dev/null 2>&1; then return_state="true"; else return_state="false"; fi; # Fail if ffprobe returns no result ffprobe_out="$(ffprobe -v error -select_streams a -show_entries stream=codec_name -of default=nokey=1:noprint_wrappers=1 "$file_in")"; if [[ -z $ffprobe_out ]]; then return_state="false"; fi; # Report exit code if [[ $return_state = "true" ]]; then return 0; else return 1; fi; } # Checks if file has valid codec name using ffprobe get_audio_format() { # Desc: Gets audio format of file as string # Usage: get_audio_format arg1 # Depends: ffprobe # Version: 0.0.1 # Input: arg1: input file path # Output: stdout (if valid audio format) # exit code 0 if audio file; 1 otherwise # Example: get_audio_format myvideo.mp4 # Note: Would return "opus" if full ffprobe report had 'Audio: opus, 48000 Hz, stereo, fltp' # Note: Not tested with videos containing multiple video streams # Ref/Attrib: [1] https://stackoverflow.com/questions/5618363/is-there-a-way-to-use-ffmpeg-to-determine-the-encoding-of-a-file-before-transcod # [2] https://stackoverflow.com/questions/44123532/how-to-find-out-the-file-extension-for-extracting-audio-tracks-with-ffmpeg-and-p#comment88464070_50723126 local audio_format file_in; local return_state; file_in="$1"; # Return error exit code if not audio file ## Return error if ffprobe itself exited on error if ! ffprobe -v error -select_streams a -show_entries stream=codec_name -of default=nokey=1:noprint_wrappers=1 "$file_in" 1>/dev/null 2>&1; then return_state="false"; fi; # Get audio format audio_format="$(ffprobe -v error -select_streams a -show_entries stream=codec_name -of default=nokey=1:noprint_wrappers=1 "$file_in")"; # see [1] ## Return error if audio format is incorrectly formatted (e.g. reject if contains spaces) pattern="^[[:alnum:]]+$"; # alphanumeric string with no spaces if [[ $audio_format =~ $pattern ]]; then return_state="true"; # Report audio format echo "$audio_format"; else return_state="false"; fi; # Report exit code if [[ $return_state = "true" ]]; then return 0; else return 1; fi; } # Get audio format as stdout get_media_length() { # Use ffprobe to get media container length in seconds (float) # Usage: get_media_length arg1 # Input: arg1: path to file # Output: stdout: seconds (float) # Depends: ffprobe 4.1.8 # Ref/Attrib: [1] How to get video duration in seconds? https://superuser.com/a/945604 local file_in file_in="$1"; if [[ ! -f $file_in ]]; then die "ERROR:Not a file:$file_in"; fi; ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$file_in"; } # Get media container length in seconds via stdout checkInt() { # Desc: Checks if arg is integer # Usage: checkInt arg # Input: arg: integer # Output: - return code 0 (if arg is integer) # - return code 1 (if arg is not integer) # Example: if ! checkInt $arg; then echo "not int"; fi; # Version: 0.0.1 local returnState #===Process Arg=== if [[ $# -ne 1 ]]; then die "ERROR:Invalid number of arguments:$#"; fi; RETEST1='^[0-9]+$'; # Regular Expression to test if [[ ! $1 =~ $RETEST1 ]] ; then returnState="false"; else returnState="true"; fi; #===Determine function return code=== if [ "$returnState" = "true" ]; then return 0; else return 1; fi; } # Checks if arg is integer checkIsInArray() { # Desc: Checks if input arg is element in array # Usage: checkIsInArray arg1 arg2 # Version: 0.0.1 # Input: arg1: test string # arg2: array # Output: exit code 0 if test string is in array; 1 otherwise # Example: checkIsInArray "foo" "${myArray[@]}" # Ref/Attrib: [1] How do I check if variable is an array? https://stackoverflow.com/a/27254437 # [2] How to pass an array as function argument? https://askubuntu.com/a/674347 local return_state input arg1 string_test declare -a arg2 array_test input=("$@") # See [2] arg1="${input[0]}"; arg2=("${input[@]:1}"); #yell "DEBUG:input:${input[@]}"; #yell "DEBUG:arg1:${arg1[@]}"; #yell "DEBUG:arg2:${arg2[@]}"; string_test="$arg1"; array_test=("${arg2[@]}"); #yell "DEBUG:string_test:$string_test"; #yell "DEBUG:$(declare -p array_test)"; for element in "${array_test[@]}"; do #yell "DEBUG:element:$element"; if [[ "$element" =~ ^"$string_test" ]]; then return_state="true"; continue; fi; done; # Report exit code if [[ $return_state == "true" ]]; then return 0; else return 1; fi; } # Check if string is element in array main() { # Desc: Main program # Input: arg1: path to source tree # arg2: path to destination tree # arg3: cumulative duration (seconds) of audio files in destination tree # arg4: cumulative size (bytes) of audio files in destination tree (optional) # assoc arrays: appRollCall, fileRollCall, dirRollCall # env.var: BKSHUF_PARAM_LINEC (bkshuf) # BKSHUF_PARAM_GSIZE (bkshuf) # arrays: music_codecs # vars: max_filename_length, min_file_duration, max_file_duration, # min_file_size, max_file_size, siz_dest, max_find_depth # Output: [none] # Depends: yell(), checkdir() 0.1.2, displayMissing() 1.0.0, GNU Coreutils 8.30 # BK-2020-03: bkshuf v0.1.0 local arg1 arg2 arg3 dur_dest dir_source dir_dest declare -a list_files # array for files to be considered declare -a list_copy # array for files to be copied (string: "$dur,$fsize,$path") # Parse args arg1="$1"; arg2="$2"; arg3="$3"; arg4="$4"; if ! { [[ $# -eq 3 ]] || [[ $# -eq 4 ]]; }; then showUsage; die "ERROR:Invalid number of args:$#"; fi; # Check env vars if ! checkInt "$BKSHUF_PARAM_LINEC"; then die "FATAL:Not an int:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_LINEC"; fi; if ! checkInt "$BKSHUF_PARAM_GSIZE"; then die "FATAL:Not an int:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_GSIZE"; fi; ## Check duration if checkInt "$arg3"; then dur_dest="$arg3"; else die "FATAL:Duration (seconds) not an int:$arg3" fi; ## Check size if [[ -n "$arg4" ]]; then if checkInt "$arg4"; then siz_dest="$arg4"; else die "FATAL:Size (bytes) not an int:$arg4"; fi; fi; ## Check directories if checkdir "$arg1" "$arg2"; then dir_source="$arg1"; dir_dest="$arg2"; else yell "ERROR:Directory error"; fi; ## Check apps checkapp ffprobe bkshuf; if ! displayMissing; then showUsage; die "ERROR:Check missing resources."; fi; yell "STATUS:Working..."; # Populate list_files array while read -r line; do list_files+=("$line"); done < <(find -L "$dir_source" -maxdepth "$max_find_depth" -type f | \ grep -Ev "$ext_ignore" | \ sort); # Test and add random elements of list_files to list_copy dur=0; # Initialize duration siz=0; # Initialize size n=0; # Initialize loop counter dur_cand_w=1; # Init duration digit width counter siz_cand_w=1; # Init size digit width counter ## Get element count of list_files array file_count="${#list_files[@]}"; while read -r line && \ [[ $dur -le $((dur_dest * 95 / 100)) ]] && \ [[ $siz -le $((siz_dest * 95 / 100)) ]] && \ [[ $n -le $file_count ]]; do ((n++)); yell "DEBUG:list_copy building loop:$n/$file_count"; # debug printf "DEBUG:%8d,%8d,%8d/%8d,%8d/%8d\n" "$dur_cand" "$siz_cand" "$dur" "$dur_dest" "$siz" "$siz_dest"; # debug path_candfile="$line"; # path of candidate file ### Check size siz_cand="$(du -b "$path_candfile" | awk '{ print $1 }')"; # size in bytes if ! checkInt "$siz_cand"; then continue; fi; # reject if [[ "$((siz + siz_cand))" -gt "$siz_dest" ]]; then continue; fi; # reject if [[ "$siz_cand" -lt "$min_file_size" ]]; then continue; fi; # reject if [[ "$siz_cand" -gt "$max_file_size" ]]; then continue; fi; # reject ### Check if has valid codec if ! check_parsable_audio_ffprobe "$path_candfile"; then continue; fi; # reject ### Check if desired codec file_format="$(get_audio_format "$path_candfile")"; if ! checkIsInArray "$file_format" "${music_codecs[@]}"; then continue; fi; # reject ### Check duration dur_cand="$(get_media_length "$path_candfile")"; dur_cand="${dur_cand%%.*}"; # convert float to int if ! checkInt "$dur_cand"; then continue; fi; # reject if [[ "$((dur + dur_cand))" -gt "$dur_dest" ]]; then continue; fi; # reject if [[ "$dur_cand" -lt "$min_file_duration" ]]; then continue; fi; # reject if [[ "$dur_cand" -gt "$max_file_duration" ]]; then continue; fi; # reject ### Update stats digits widths #### duration dur_cand_wnow="$(printf "%s" "$dur_cand" | wc -m)"; # duration width count if [[ $dur_cand_wnow -gt $dur_cand_w ]]; then dur_cand_w="$dur_cand_wnow"; fi; #### size siz_cand_wnow="$(printf "%s" "$siz_cand" | wc -m)"; # size width count if [[ $siz_cand_wnow -gt $siz_cand_w ]]; then siz_cand_w="$siz_cand_wnow"; fi; ### Add/update candfile to array: ### list_copy (array with "duration, size, path") #yell "DEBUG:Adding $path_candfile"; printf "DEBUG:%8d,%8d,%s\n" "$dur_cand" "$siz_cand" "$path_candfile" 1>&2; #printf "DEBUG:dur:%s\n" "$dur" 1>&2; #printf "DEBUG:siz:%s\n" "$siz" 1>&2; list_copy+=("$dur_cand,$siz_cand,$path_candfile"); # for copying with order ### Update total duration $dur and total size $siz dur="$((dur + dur_cand))"; siz="$((siz + siz_cand))"; yell "DEBUG:dur:$dur"; yell "DEBUG:siz:$siz"; done < <(printf "%s\n" "${list_files[@]}" | bkshuf); #yell "DEBUG:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_LINEC"; #yell "DEBUG:BKSHUF_PARAM_GSIZE:$BKSHUF_PARAM_GSIZE"; n=0; # Initialize loop counter num_w="$(printf "%s" "${#list_copy[@]}" | wc -m)"; # init file number format num_fmt="%0""$num_w""d"; path_log_output="$dir_dest"/COPY.log; printf "num,fingerprint,duration,size,original_path\n" >> "$path_log_output"; # Copy files in list_copy to dir_dest; while read -r line; do #yell "DEBUG:line:$line"; # debug fdur="$(printf "%s" "$line" | cut -d',' -f1)"; fsize="$(printf "%s" "$line" | cut -d',' -f2)"; fpath="$(printf "%s" "$line" | cut -d',' -f3-)"; ## Get basename of path file_basename="$(basename "$fpath")"; ### Get basename without unprintable non-ASCII characters file_basename_compat="$(printf "%s" "$file_basename" | tr -dc '[:graph:][:space:]' )"; ## Get 16-character b2sum fingerprint (for different files that share basename) fingerprint="$(b2sum -l32 "$fpath" | awk '{print $1}' )"; ## Form output filename num="$(printf "$num_fmt" "$n")"; file_name="$num"_"$fingerprint".."$file_basename_compat"; file_name="${file_name:0:$max_filename_length}"; # Limit filename length (e.g. Windows has max of 255 characters) ## Form output path path_output="$dir_dest"/"$file_name"; ## Copy must cp "$fpath" "$path_output" && yell "NOTICE:Copied ($(printf "%""$dur_cand_w"d "$fdur") seconds): $fpath "; #yell "DEBUG:Copied $file_basename to $dur_dest."; ## Append log fpath_can="$(readlink -f "$fpath")"; # resolve symlinks to canonical path log_fmt="%s,%s,%""$dur_cand_w""d,%""$siz_cand_w""d,%s\n"; # e.g. "%s,%3d,%5d,%s" if dur_cand_w=3 and siz_cand_w=5 printf "$log_fmt" "$num" "$fingerprint" "$fdur" "$fsize" "$fpath_can" >> "$path_log_output"; ((n++)); unset file_basename file_basename_compat path_output; done < <(printf "%s\n" "${list_copy[@]}"); # Report total duration and size yell "NOTICE:Total duration (seconds):$dur"; yell "NOTICE:Total size (bytes):$siz"; } # Main program main "$@"; # Author: Steven Baltakatei Sandoval # License: GPLv3+ # bkshuf v0.1.0 # Author: Steven Baltakatei Sandoval # License: GPLv3+ # URL: https://gitlab.com/baltakatei/baltakatei-exdev/-/blob/b9e8b771e985fcdf26ba8b9ccb8e31b62da757d3/unitproc/bkshuf