2 # Desc: Copies random audio files 
   3 # Usage: bk-copy-rand-music [dir SOURCE] [dir DEST] [int DURATION] ([int BYTES]) 
   5 # Depends: BK-2020-03: bkshuf v0.1.0 
   7 declare -Ag appRollCall 
# Associative array for storing app status 
   8 declare -Ag fileRollCall 
# Associative array for storing file status 
   9 declare -Ag dirRollCall 
# Associative array for storing dir status 
  10 declare -a music_codecs 
# Array for storing valid codec names (e.g. "aac" "mp3") 
  12 # Adjustable parameters 
  13 music_codecs
=("vorbis" "aac" "mp3" "flac" "opus" "eac3"); # whitelist of valid codec_names ffprobe might return 
  14 ext_ignore
=".ots\$|.mid\$|.json\$|.gz\$|.jpg\$|.png\$|.asc\$|.pdf\$|.txt\$|.vtt\$|\.SUM|.zip\$|.xz\$|.org\$|.txt\$"; # blacklist of file extensions for 'grep -Evi' 
  15 max_filename_length
="255"; # max output filename length 
  16 min_file_duration
="30"; # minimum duration per music file 
  17 max_file_duration
="3600"; # maximum duration per music file 
  18 min_file_size
="100000"; # minimum size per music file (bytes) 
  19 max_file_size
="100000000"; # maximum size per music file (bytes) 
  20 siz_dest
="600000000"; # default destination size limit: 600 MB 
  21 max_find_depth
="10"; # max find depth 
  23 # Load env vars (bkshuf defaults for typical music albums) 
  24 if [[ ! -v BKSHUF_PARAM_LINEC 
]]; then export BKSHUF_PARAM_LINEC
=1000000; fi; 
  25 if [[ ! -v BKSHUF_PARAM_GSIZE 
]]; then export BKSHUF_PARAM_GSIZE
=10; fi; 
  27 yell
() { echo "$0: $*" >&2; } # print script path and all args to stderr 
  28 die
() { yell 
"$*"; exit 111; } # same as yell() but non-zero exit status 
  29 must
() { "$@" || die 
"cannot $*"; } # runs args as command, reports args if command fails 
  31     # Desc: If arg is a command, save result in assoc array 'appRollCall' 
  32     # Usage: checkapp arg1 arg2 arg3 ... 
  34     # Input: global assoc. array 'appRollCall' 
  35     # Output: adds/updates key(value) to global assoc array 'appRollCall' 
  41         if command -v "$arg" 1>/dev
/null 
2>&1; then # Check if arg is a valid command 
  42             appRollCall
[$arg]="true"; 
  43             if ! [ "$returnState" = "false" ]; then returnState
="true"; fi; 
  45             appRollCall
[$arg]="false"; returnState
="false"; 
  49     #===Determine function return code=== 
  50     if [ "$returnState" = "true" ]; then 
  55 } # Check that app exists 
  57     # Desc: If arg is a file path, save result in assoc array 'fileRollCall' 
  58     # Usage: checkfile arg1 arg2 arg3 ... 
  60     # Input: global assoc. array 'fileRollCall' 
  61     # Output: adds/updates key(value) to global assoc array 'fileRollCall'; 
  62     # Output: returns 0 if app found, 1 otherwise 
  68         if [ -f "$arg" ]; then 
  69             fileRollCall
["$arg"]="true"; 
  70             if ! [ "$returnState" = "false" ]; then returnState
="true"; fi; 
  72             fileRollCall
["$arg"]="false"; returnState
="false"; 
  76     #===Determine function return code=== 
  77     if [ "$returnState" = "true" ]; then 
  82 } # Check that file exists 
  84     # Desc: If arg is a dir path, save result in assoc array 'dirRollCall' 
  85     # Usage: checkdir arg1 arg2 arg3 ... 
  87     # Input: global assoc. array 'dirRollCall' 
  88     # Output: adds/updates key(value) to global assoc array 'dirRollCall'; 
  89     # Output: returns 0 if all args are dirs; 1 otherwise 
  95         if [ -z "$arg" ]; then 
  96             dirRollCall
["(Unspecified Dirname(s))"]="false"; returnState
="false"; 
  97         elif [ -d "$arg" ]; then 
  98             dirRollCall
["$arg"]="true"; 
  99             if ! [ "$returnState" = "false" ]; then returnState
="true"; fi 
 101             dirRollCall
["$arg"]="false"; returnState
="false"; 
 105     #===Determine function return code=== 
 106     if [ "$returnState" = "true" ]; then 
 111 } # Check that dir exists 
 113     # Desc: Displays missing apps, files, and dirs 
 114     # Usage: displayMissing 
 116     # Input: associative arrays: appRollCall, fileRollCall, dirRollCall 
 117     # Output: stderr: messages indicating missing apps, file, or dirs 
 118     # Output: returns exit code 0 if nothing missing; 1 otherwise 
 119     # Depends: bash 5, checkAppFileDir() 
 120     local missingApps value appMissing missingFiles fileMissing
 
 121     local missingDirs dirMissing
 
 123     #==BEGIN Display errors== 
 124     #===BEGIN Display Missing Apps=== 
 125     missingApps
="Missing apps  :"; 
 126     #for key in "${!appRollCall[@]}"; do echo "DEBUG:$key => ${appRollCall[$key]}"; done 
 127     for key 
in "${!appRollCall[@]}"; do 
 128         value
="${appRollCall[$key]}"; 
 129         if [ "$value" = "false" ]; then 
 130             #echo "DEBUG:Missing apps: $key => $value"; 
 131             missingApps
="$missingApps""$key "; 
 135     if [ "$appMissing" = "true" ]; then  # Only indicate if an app is missing. 
 136         echo "$missingApps" 1>&2; 
 139     #===END Display Missing Apps=== 
 141     #===BEGIN Display Missing Files=== 
 142     missingFiles
="Missing files:"; 
 143     #for key in "${!fileRollCall[@]}"; do echo "DEBUG:$key => ${fileRollCall[$key]}"; done 
 144     for key 
in "${!fileRollCall[@]}"; do 
 145         value
="${fileRollCall[$key]}"; 
 146         if [ "$value" = "false" ]; then 
 147             #echo "DEBUG:Missing files: $key => $value"; 
 148             missingFiles
="$missingFiles""$key "; 
 152     if [ "$fileMissing" = "true" ]; then  # Only indicate if an app is missing. 
 153         echo "$missingFiles" 1>&2; 
 156     #===END Display Missing Files=== 
 158     #===BEGIN Display Missing Directories=== 
 159     missingDirs
="Missing dirs:"; 
 160     #for key in "${!dirRollCall[@]}"; do echo "DEBUG:$key => ${dirRollCall[$key]}"; done 
 161     for key 
in "${!dirRollCall[@]}"; do 
 162         value
="${dirRollCall[$key]}"; 
 163         if [ "$value" = "false" ]; then 
 164             #echo "DEBUG:Missing dirs: $key => $value"; 
 165             missingDirs
="$missingDirs""$key "; 
 169     if [ "$dirMissing" = "true" ]; then  # Only indicate if an dir is missing. 
 170         echo "$missingDirs" 1>&2; 
 173     #===END Display Missing Directories=== 
 175     #==END Display errors== 
 176     #==BEGIN Determine function return code=== 
 177     if [ "$appMissing" == "true" ] || 
[ "$fileMissing" == "true" ] || 
[ "$dirMissing" == "true" ]; then 
 182     #==END Determine function return code=== 
 183 } # Display missing apps, files, dirs 
 185     # Desc: Display script usage information 
 190     # Depends: GNU-coreutils 8.30 (cat) 
 194       This script may be used to copy a random selection of files containing 
 195       audio tracks from SOURCE to DEST. 
 198       bk-copy-rand-music [dir SOURCE] [dir DEST] [int DURATION] (int BYTES) 
 201       bk-copy-rand-music ~/Music /tmp/music-sample 3600 
 202       bk-copy-rand-music ~/Music /tmp/music-sample 3600 680000000 
 208     ENVIRONMENT VARIABLES 
 209       BKSHUF_PARAM_LINEC (see `bkshuf` in BK-2020-03) 
 210       BKSHUF_PARAM_GSIZE (see `bkshuf` in BK-2020-03) 
 212 } # Display information on how to use this script. 
 213 check_parsable_audio_ffprobe
() { 
 214     # Desc: Checks if ffprobe returns valid audio codec name for file 
 215     # Usage: check_parsable_audio_ffprobe [path FILE] 
 217     # Input: arg1: file path 
 218     # Output: exit code 0 if returns valid codec name; 1 otherwise 
 219     # Depends: ffprobe, die() 
 220     local file_in ffprobe_out
 
 222     if [[ $# -ne 1 ]]; then die 
"ERROR:Invalid number of args:$#"; fi; 
 226     # Check if ffprobe detects an audio stream 
 227     if ffprobe 
-v error 
-select_streams a 
-show_entries stream
=codec_name 
-of default
=nokey
=1:noprint_wrappers
=1 "$file_in" 1>/dev
/null 
2>&1; then 
 230         return_state
="false"; 
 233     # Fail if ffprobe returns no result 
 234     ffprobe_out
="$(ffprobe -v error -select_streams a -show_entries stream=codec_name -of default=nokey=1:noprint_wrappers=1 "$file_in")"; 
 235     if [[ -z $ffprobe_out ]]; then 
 236         return_state
="false"; 
 240     if [[ $return_state = "true" ]]; then 
 245 } # Checks if file has valid codec name using ffprobe 
 247     # Desc: Gets audio format of file as string 
 248     # Usage: get_audio_format arg1 
 251     # Input: arg1: input file path 
 252     # Output: stdout (if valid audio format) 
 253     #         exit code 0 if audio file; 1 otherwise 
 254     # Example: get_audio_format myvideo.mp4 
 255     #   Note: Would return "opus" if full ffprobe report had 'Audio: opus, 48000 Hz, stereo, fltp' 
 256     # Note: Not tested with videos containing multiple video streams 
 257     # Ref/Attrib: [1] https://stackoverflow.com/questions/5618363/is-there-a-way-to-use-ffmpeg-to-determine-the-encoding-of-a-file-before-transcod 
 258     #             [2] https://stackoverflow.com/questions/44123532/how-to-find-out-the-file-extension-for-extracting-audio-tracks-with-ffmpeg-and-p#comment88464070_50723126 
 259     local audio_format file_in
; 
 263     # Return error exit code if not audio file 
 264     ## Return error if ffprobe itself exited on error 
 265     if ! ffprobe 
-v error 
-select_streams a 
-show_entries stream
=codec_name 
-of default
=nokey
=1:noprint_wrappers
=1 "$file_in" 1>/dev
/null 
2>&1; then 
 266         return_state
="false"; 
 270     audio_format
="$(ffprobe -v error -select_streams a -show_entries stream=codec_name -of default=nokey=1:noprint_wrappers=1 "$file_in")"; # see [1] 
 272     ## Return error if audio format is incorrectly formatted (e.g. reject if contains spaces) 
 273     pattern
="^[[:alnum:]]+$"; # alphanumeric string with no spaces 
 274     if [[ $audio_format =~ 
$pattern ]]; then 
 276         # Report audio format 
 277         echo "$audio_format"; 
 279         return_state
="false"; 
 283     if [[ $return_state = "true" ]]; then 
 288 } # Get audio format as stdout 
 290     # Use ffprobe to get media container length in seconds (float) 
 291     # Usage: get_media_length arg1 
 292     # Input:  arg1: path to file 
 293     # Output: stdout: seconds (float) 
 294     # Depends: ffprobe 4.1.8 
 295     # Ref/Attrib: [1] How to get video duration in seconds? https://superuser.com/a/945604 
 298     if [[ ! -f $file_in ]]; then 
 299         die 
"ERROR:Not a file:$file_in"; 
 301     ffprobe 
-v error 
-show_entries format
=duration 
-of default
=noprint_wrappers
=1:nokey
=1 "$file_in"; 
 302 } # Get media container length in seconds via stdout 
 304     # Desc: Checks if arg is integer 
 305     # Usage: checkInt arg 
 306     # Input: arg: integer 
 307     # Output: - return code 0 (if arg is integer) 
 308     #         - return code 1 (if arg is not integer) 
 309     # Example: if ! checkInt $arg; then echo "not int"; fi; 
 314     if [[ $# -ne 1 ]]; then 
 315         die 
"ERROR:Invalid number of arguments:$#"; 
 318     RETEST1
='^[0-9]+$'; # Regular Expression to test 
 319     if [[ ! $1 =~ 
$RETEST1 ]] ; then 
 325     #===Determine function return code=== 
 326     if [ "$returnState" = "true" ]; then 
 331 } # Checks if arg is integer 
 333     # Desc: Checks if input arg is element in array 
 334     # Usage: checkIsInArray arg1 arg2 
 336     # Input: arg1: test string 
 338     # Output: exit code 0 if test string is in array; 1 otherwise 
 339     # Example: checkIsInArray "foo" "${myArray[@]}" 
 340     # Ref/Attrib: [1] How do I check if variable is an array? https://stackoverflow.com/a/27254437 
 341     #             [2] How to pass an array as function argument? https://askubuntu.com/a/674347 
 342     local return_state input arg1 string_test
 
 343     declare -a arg2 array_test
 
 344     input
=("$@") # See [2] 
 346     arg2
=("${input[@]:1}"); 
 347     #yell "DEBUG:input:${input[@]}"; 
 348     #yell "DEBUG:arg1:${arg1[@]}"; 
 349     #yell "DEBUG:arg2:${arg2[@]}"; 
 352     array_test
=("${arg2[@]}"); 
 354     #yell "DEBUG:string_test:$string_test"; 
 355     #yell "DEBUG:$(declare -p array_test)"; 
 356     for element 
in "${array_test[@]}"; do 
 357         #yell "DEBUG:element:$element"; 
 358         if [[ "$element" =~ ^
"$string_test" ]]; then 
 365     if [[ $return_state == "true" ]]; then 
 370 } # Check if string is element in array 
 373     # Input: arg1: path to source tree 
 374     #        arg2: path to destination tree 
 375     #        arg3: cumulative duration (seconds) of audio files in destination tree 
 376     #        arg4: cumulative size (bytes) of audio files in destination tree (optional) 
 377     #        assoc arrays: appRollCall, fileRollCall, dirRollCall 
 378     #        env.var: BKSHUF_PARAM_LINEC (bkshuf) 
 379     #                 BKSHUF_PARAM_GSIZE (bkshuf) 
 380     #        arrays: music_codecs 
 381     #        vars: max_filename_length, min_file_duration, max_file_duration, 
 382     #                min_file_size, max_file_size, siz_dest, max_find_depth 
 384     # Depends: yell(), checkdir() 0.1.2, displayMissing() 1.0.0, GNU Coreutils 8.30 
 385     #          BK-2020-03: bkshuf v0.1.0 
 386     local arg1 arg2 arg3 dur_dest dir_source dir_dest
 
 387     declare -a list_files 
# array for files to be considered 
 388     declare -a list_copy 
# array for files to be copied (string: "$dur,$fsize,$path") 
 395     if ! { [[ $# -eq 3 ]] || 
[[ $# -eq 4 ]]; }; then 
 397         die 
"ERROR:Invalid number of args:$#"; fi; 
 400     if ! checkInt 
"$BKSHUF_PARAM_LINEC"; then 
 401         die 
"FATAL:Not an int:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_LINEC"; fi; 
 402     if ! checkInt 
"$BKSHUF_PARAM_GSIZE"; then 
 403         die 
"FATAL:Not an int:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_GSIZE"; fi; 
 406     if checkInt 
"$arg3"; then 
 409         die 
"FATAL:Duration (seconds) not an int:$arg3" 
 413     if [[ -n "$arg4" ]]; then 
 414         if checkInt 
"$arg4"; then 
 417             die 
"FATAL:Size (bytes) not an int:$arg4"; 
 422     if checkdir 
"$arg1" "$arg2"; then 
 426         yell 
"ERROR:Directory error"; 
 430     checkapp ffprobe bkshuf
; 
 432     if ! displayMissing
; then 
 434         die 
"ERROR:Check missing resources."; 
 437     yell 
"STATUS:Working..."; 
 439     # Populate list_files array 
 440     while read -r line
; do 
 441         list_files
+=("$line"); 
 442     done < <(find -L "$dir_source" -maxdepth "$max_find_depth" -type f | \
 
 443                  grep -Ev "$ext_ignore" | \
 
 446     # Test and add random elements of list_files to list_copy 
 447     dur
=0; # Initialize duration 
 448     siz
=0; # Initialize size 
 449     n
=0; # Initialize loop counter 
 450     dur_cand_w
=1; # Init duration digit width counter 
 451     siz_cand_w
=1; # Init size digit width counter 
 452     ## Get element count of list_files array 
 453     file_count
="${#list_files[@]}"; 
 454     while read -r line 
&& \
 
 455             [[ $dur -le $
((dur_dest 
* 95 / 100)) ]] && \
 
 456             [[ $siz -le $
((siz_dest 
* 95 / 100)) ]] && \
 
 457             [[ $n -le $file_count ]]; do 
 460         yell 
"DEBUG:list_copy building loop:$n/$file_count"; # debug 
 461         printf "DEBUG:%8d,%8d,%8d/%8d,%8d/%8d\n" "$dur_cand" "$siz_cand" "$dur" "$dur_dest" "$siz" "$siz_dest"; # debug 
 463         path_candfile
="$line"; # path of candidate file 
 466         siz_cand
="$(du -Lb "$path_candfile" | awk '{ print $1 }')"; # size in bytes 
 467         if ! checkInt 
"$siz_cand"; then continue; fi; # reject 
 468         if [[ "$((siz + siz_cand))" -gt "$siz_dest" ]]; then continue; fi; # reject 
 469         if [[ "$siz_cand" -lt "$min_file_size" ]]; then continue; fi; # reject 
 470         if [[ "$siz_cand" -gt "$max_file_size" ]]; then continue; fi; # reject 
 472         ### Check if has valid codec 
 473         if ! check_parsable_audio_ffprobe 
"$path_candfile"; then continue; fi; # reject 
 475         ### Check if desired codec 
 476         file_format
="$(get_audio_format "$path_candfile")"; 
 477         if ! checkIsInArray 
"$file_format" "${music_codecs[@]}"; then continue; fi; # reject 
 480         dur_cand
="$(get_media_length "$path_candfile")"; 
 481         dur_cand
="${dur_cand%%.*}"; # convert float to int 
 482         if ! checkInt 
"$dur_cand"; then continue; fi; # reject 
 483         if [[ "$((dur + dur_cand))" -gt "$dur_dest" ]]; then continue; fi; # reject 
 484         if [[ "$dur_cand" -lt "$min_file_duration" ]]; then continue; fi; # reject 
 485         if [[ "$dur_cand" -gt "$max_file_duration" ]]; then continue; fi; # reject 
 487         ### Update stats digits widths 
 489         dur_cand_wnow
="$(printf "%s
" "$dur_cand" | wc -m)"; # duration width count 
 490         if [[ $dur_cand_wnow -gt $dur_cand_w ]]; then 
 491             dur_cand_w
="$dur_cand_wnow"; fi; 
 493         siz_cand_wnow
="$(printf "%s
" "$siz_cand" | wc -m)"; # size  width count 
 494         if [[ $siz_cand_wnow -gt $siz_cand_w ]]; then 
 495             siz_cand_w
="$siz_cand_wnow"; fi; 
 497         ### Add/update candfile to array: 
 498         ###   list_copy (array with "duration, size, path") 
 499         #yell "DEBUG:Adding $path_candfile"; 
 500         printf "DEBUG:%8d,%8d,%s\n" "$dur_cand" "$siz_cand" "$path_candfile" 1>&2; 
 501         #printf "DEBUG:dur:%s\n" "$dur" 1>&2; 
 502         #printf "DEBUG:siz:%s\n" "$siz" 1>&2; 
 503         list_copy
+=("$dur_cand,$siz_cand,$path_candfile"); # for copying with order 
 505         ### Update total duration $dur and total size $siz 
 506         dur
="$((dur + dur_cand))"; 
 507         siz
="$((siz + siz_cand))"; 
 508         yell 
"DEBUG:dur:$dur"; 
 509         yell 
"DEBUG:siz:$siz"; 
 510     done < <(printf "%s\n" "${list_files[@]}" | bkshuf
); 
 512     #yell "DEBUG:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_LINEC"; 
 513     #yell "DEBUG:BKSHUF_PARAM_GSIZE:$BKSHUF_PARAM_GSIZE"; 
 515     n
=0; # Initialize loop counter 
 516     num_w
="$(printf "%s
" "${#list_copy[@]}" | wc -m)"; # init file number format 
 517     num_fmt
="%0""$num_w""d"; 
 518     path_log_output
="$dir_dest"/COPY.log
; 
 519     printf "num,fingerprint,duration,size,original_path\n" >> "$path_log_output"; 
 520     # Copy files in list_copy to dir_dest; 
 521     while read -r line
; do 
 522         #yell "DEBUG:line:$line"; # debug 
 523         fdur
="$(printf "%s
" "$line" | cut -d',' -f1)"; 
 524         fsize
="$(printf "%s
" "$line" | cut -d',' -f2)"; 
 525         fpath
="$(printf "%s
" "$line" | cut -d',' -f3-)"; 
 526         ## Get basename of path 
 527         file_basename
="$(basename "$fpath")"; 
 528         ### Get basename without unprintable non-ASCII characters 
 529         file_basename_compat
="$(printf "%s
" "$file_basename" | tr -dc '[:graph:][:space:]' )"; 
 531         ## Get 16-character b2sum fingerprint (for different files that share basename) 
 532         fingerprint
="$(b2sum -l32 "$fpath" | awk '{print $1}' )"; 
 534         ## Form output filename 
 535         num
="$(printf "$num_fmt" "$n")"; 
 536         file_name
="$num"_
"$fingerprint"..
"$file_basename_compat"; 
 537         file_name
="${file_name:0:$max_filename_length}"; # Limit filename length (e.g. Windows has max of 255 characters) 
 540         path_output
="$dir_dest"/"$file_name"; 
 543         must 
cp "$fpath" "$path_output" && yell 
"NOTICE:Copied ($(printf "%""$dur_cand_w"d "$fdur") seconds): $fpath "; 
 544         #yell "DEBUG:Copied $file_basename to $dur_dest."; 
 547         fpath_can
="$(readlink -f "$fpath")"; # resolve symlinks to canonical path 
 548         log_fmt
="%s,%s,%""$dur_cand_w""d,%""$siz_cand_w""d,%s\n"; # e.g. "%s,%3d,%5d,%s" if dur_cand_w=3 and siz_cand_w=5 
 549         printf "$log_fmt" "$num" "$fingerprint" "$fdur" "$fsize" "$fpath_can" >> "$path_log_output"; 
 552         unset file_basename file_basename_compat path_output
; 
 553     done < <(printf "%s\n" "${list_copy[@]}"); 
 555     # Report total duration and size 
 556     yell 
"NOTICE:Total duration (seconds):$dur"; 
 557     yell 
"NOTICE:Total size (bytes):$siz"; 
 563 # Author: Steven Baltakatei Sandoval 
 567 #   Author: Steven Baltakatei Sandoval 
 569 #   URL: https://gitlab.com/baltakatei/baltakatei-exdev/-/blob/b9e8b771e985fcdf26ba8b9ccb8e31b62da757d3/unitproc/bkshuf