2 # Desc: Copies random audio files
3 # Usage: bk-copy-rand-music [dir SOURCE] [dir DEST] [int DURATION] ([int BYTES])
5 # Depends: BK-2020-03: bkshuf v0.1.0
7 declare -Ag appRollCall
# Associative array for storing app status
8 declare -Ag fileRollCall
# Associative array for storing file status
9 declare -Ag dirRollCall
# Associative array for storing dir status
10 declare -a music_codecs
# Array for storing valid codec names (e.g. "aac" "mp3")
12 # Adjustable parameters
13 music_codecs
=("vorbis" "aac" "mp3" "flac" "opus"); # whitelist of valid codec_names ffprobe might return
14 ext_ignore
=".ots\$|.mid\$|.json\$|.gz\$|.jpg\$|.png\$|.asc\$|.pdf\$|.txt\$|.vtt\$|\.SUM|.zip\$|.xz\$|.org\$|.txt\$"; # blacklist of file extensions for 'grep -Evi'
15 max_filename_length
="255"; # max output filename length
16 min_file_duration
="30"; # minimum duration per music file
17 max_file_duration
="3600"; # maximum duration per music file
18 min_file_size
="100000"; # minimum size per music file (bytes)
19 max_file_size
="100000000"; # maximum size per music file (bytes)
20 siz_dest
="600000000"; # default destination size limit: 600 MB
21 max_find_depth
="10"; # max find depth
23 # Load env vars (bkshuf defaults for typical music albums)
24 if [[ ! -v BKSHUF_PARAM_LINEC
]]; then export BKSHUF_PARAM_LINEC
=1000000; fi;
25 if [[ ! -v BKSHUF_PARAM_GSIZE
]]; then export BKSHUF_PARAM_GSIZE
=10; fi;
27 yell
() { echo "$0: $*" >&2; } # print script path and all args to stderr
28 die
() { yell
"$*"; exit 111; } # same as yell() but non-zero exit status
29 must
() { "$@" || die
"cannot $*"; } # runs args as command, reports args if command fails
31 # Desc: If arg is a command, save result in assoc array 'appRollCall'
32 # Usage: checkapp arg1 arg2 arg3 ...
34 # Input: global assoc. array 'appRollCall'
35 # Output: adds/updates key(value) to global assoc array 'appRollCall'
41 if command -v "$arg" 1>/dev
/null
2>&1; then # Check if arg is a valid command
42 appRollCall
[$arg]="true";
43 if ! [ "$returnState" = "false" ]; then returnState
="true"; fi;
45 appRollCall
[$arg]="false"; returnState
="false";
49 #===Determine function return code===
50 if [ "$returnState" = "true" ]; then
55 } # Check that app exists
57 # Desc: If arg is a file path, save result in assoc array 'fileRollCall'
58 # Usage: checkfile arg1 arg2 arg3 ...
60 # Input: global assoc. array 'fileRollCall'
61 # Output: adds/updates key(value) to global assoc array 'fileRollCall';
62 # Output: returns 0 if app found, 1 otherwise
68 if [ -f "$arg" ]; then
69 fileRollCall
["$arg"]="true";
70 if ! [ "$returnState" = "false" ]; then returnState
="true"; fi;
72 fileRollCall
["$arg"]="false"; returnState
="false";
76 #===Determine function return code===
77 if [ "$returnState" = "true" ]; then
82 } # Check that file exists
84 # Desc: If arg is a dir path, save result in assoc array 'dirRollCall'
85 # Usage: checkdir arg1 arg2 arg3 ...
87 # Input: global assoc. array 'dirRollCall'
88 # Output: adds/updates key(value) to global assoc array 'dirRollCall';
89 # Output: returns 0 if all args are dirs; 1 otherwise
95 if [ -z "$arg" ]; then
96 dirRollCall
["(Unspecified Dirname(s))"]="false"; returnState
="false";
97 elif [ -d "$arg" ]; then
98 dirRollCall
["$arg"]="true";
99 if ! [ "$returnState" = "false" ]; then returnState
="true"; fi
101 dirRollCall
["$arg"]="false"; returnState
="false";
105 #===Determine function return code===
106 if [ "$returnState" = "true" ]; then
111 } # Check that dir exists
113 # Desc: Displays missing apps, files, and dirs
114 # Usage: displayMissing
116 # Input: associative arrays: appRollCall, fileRollCall, dirRollCall
117 # Output: stderr: messages indicating missing apps, file, or dirs
118 # Output: returns exit code 0 if nothing missing; 1 otherwise
119 # Depends: bash 5, checkAppFileDir()
120 local missingApps value appMissing missingFiles fileMissing
121 local missingDirs dirMissing
123 #==BEGIN Display errors==
124 #===BEGIN Display Missing Apps===
125 missingApps
="Missing apps :";
126 #for key in "${!appRollCall[@]}"; do echo "DEBUG:$key => ${appRollCall[$key]}"; done
127 for key
in "${!appRollCall[@]}"; do
128 value
="${appRollCall[$key]}";
129 if [ "$value" = "false" ]; then
130 #echo "DEBUG:Missing apps: $key => $value";
131 missingApps
="$missingApps""$key ";
135 if [ "$appMissing" = "true" ]; then # Only indicate if an app is missing.
136 echo "$missingApps" 1>&2;
139 #===END Display Missing Apps===
141 #===BEGIN Display Missing Files===
142 missingFiles
="Missing files:";
143 #for key in "${!fileRollCall[@]}"; do echo "DEBUG:$key => ${fileRollCall[$key]}"; done
144 for key
in "${!fileRollCall[@]}"; do
145 value
="${fileRollCall[$key]}";
146 if [ "$value" = "false" ]; then
147 #echo "DEBUG:Missing files: $key => $value";
148 missingFiles
="$missingFiles""$key ";
152 if [ "$fileMissing" = "true" ]; then # Only indicate if an app is missing.
153 echo "$missingFiles" 1>&2;
156 #===END Display Missing Files===
158 #===BEGIN Display Missing Directories===
159 missingDirs
="Missing dirs:";
160 #for key in "${!dirRollCall[@]}"; do echo "DEBUG:$key => ${dirRollCall[$key]}"; done
161 for key
in "${!dirRollCall[@]}"; do
162 value
="${dirRollCall[$key]}";
163 if [ "$value" = "false" ]; then
164 #echo "DEBUG:Missing dirs: $key => $value";
165 missingDirs
="$missingDirs""$key ";
169 if [ "$dirMissing" = "true" ]; then # Only indicate if an dir is missing.
170 echo "$missingDirs" 1>&2;
173 #===END Display Missing Directories===
175 #==END Display errors==
176 #==BEGIN Determine function return code===
177 if [ "$appMissing" == "true" ] ||
[ "$fileMissing" == "true" ] ||
[ "$dirMissing" == "true" ]; then
182 #==END Determine function return code===
183 } # Display missing apps, files, dirs
185 # Desc: Display script usage information
190 # Depends: GNU-coreutils 8.30 (cat)
194 This script may be used to copy a random selection of files containing
195 audio tracks from SOURCE to DEST.
198 bk-copy-rand-music [dir SOURCE] [dir DEST] [int DURATION] (int BYTES)
201 bk-copy-rand-music ~/Music /tmp/music-sample 3600
202 bk-copy-rand-music ~/Music /tmp/music-sample 3600 680000000
208 ENVIRONMENT VARIABLES
209 BKSHUF_PARAM_LINEC (see `bkshuf` in BK-2020-03)
210 BKSHUF_PARAM_GSIZE (see `bkshuf` in BK-2020-03)
212 } # Display information on how to use this script.
213 check_parsable_audio_ffprobe
() {
214 # Desc: Checks if ffprobe returns valid audio codec name for file
215 # Usage: check_parsable_audio_ffprobe [path FILE]
217 # Input: arg1: file path
218 # Output: exit code 0 if returns valid codec name; 1 otherwise
219 # Depends: ffprobe, die()
220 local file_in ffprobe_out
222 if [[ $# -ne 1 ]]; then die
"ERROR:Invalid number of args:$#"; fi;
226 # Check if ffprobe detects an audio stream
227 if ffprobe
-v error
-select_streams a
-show_entries stream
=codec_name
-of default
=nokey
=1:noprint_wrappers
=1 "$file_in" 1>/dev
/null
2>&1; then
230 return_state
="false";
233 # Fail if ffprobe returns no result
234 ffprobe_out
="$(ffprobe -v error -select_streams a -show_entries stream=codec_name -of default=nokey=1:noprint_wrappers=1 "$file_in")";
235 if [[ -z $ffprobe_out ]]; then
236 return_state
="false";
240 if [[ $return_state = "true" ]]; then
245 } # Checks if file has valid codec name using ffprobe
247 # Desc: Gets audio format of file as string
248 # Usage: get_audio_format arg1
251 # Input: arg1: input file path
252 # Output: stdout (if valid audio format)
253 # exit code 0 if audio file; 1 otherwise
254 # Example: get_audio_format myvideo.mp4
255 # Note: Would return "opus" if full ffprobe report had 'Audio: opus, 48000 Hz, stereo, fltp'
256 # Note: Not tested with videos containing multiple video streams
257 # Ref/Attrib: [1] https://stackoverflow.com/questions/5618363/is-there-a-way-to-use-ffmpeg-to-determine-the-encoding-of-a-file-before-transcod
258 # [2] https://stackoverflow.com/questions/44123532/how-to-find-out-the-file-extension-for-extracting-audio-tracks-with-ffmpeg-and-p#comment88464070_50723126
259 local audio_format file_in
;
263 # Return error exit code if not audio file
264 ## Return error if ffprobe itself exited on error
265 if ! ffprobe
-v error
-select_streams a
-show_entries stream
=codec_name
-of default
=nokey
=1:noprint_wrappers
=1 "$file_in" 1>/dev
/null
2>&1; then
266 return_state
="false";
270 audio_format
="$(ffprobe -v error -select_streams a -show_entries stream=codec_name -of default=nokey=1:noprint_wrappers=1 "$file_in")"; # see [1]
272 ## Return error if audio format is incorrectly formatted (e.g. reject if contains spaces)
273 pattern
="^[[:alnum:]]+$"; # alphanumeric string with no spaces
274 if [[ $audio_format =~
$pattern ]]; then
276 # Report audio format
277 echo "$audio_format";
279 return_state
="false";
283 if [[ $return_state = "true" ]]; then
288 } # Get audio format as stdout
290 # Use ffprobe to get media container length in seconds (float)
291 # Usage: get_media_length arg1
292 # Input: arg1: path to file
293 # Output: stdout: seconds (float)
294 # Depends: ffprobe 4.1.8
295 # Ref/Attrib: [1] How to get video duration in seconds? https://superuser.com/a/945604
298 if [[ ! -f $file_in ]]; then
299 die
"ERROR:Not a file:$file_in";
301 ffprobe
-v error
-show_entries format
=duration
-of default
=noprint_wrappers
=1:nokey
=1 "$file_in";
302 } # Get media container length in seconds via stdout
304 # Desc: Checks if arg is integer
305 # Usage: checkInt arg
306 # Input: arg: integer
307 # Output: - return code 0 (if arg is integer)
308 # - return code 1 (if arg is not integer)
309 # Example: if ! checkInt $arg; then echo "not int"; fi;
314 if [[ $# -ne 1 ]]; then
315 die
"ERROR:Invalid number of arguments:$#";
318 RETEST1
='^[0-9]+$'; # Regular Expression to test
319 if [[ ! $1 =~
$RETEST1 ]] ; then
325 #===Determine function return code===
326 if [ "$returnState" = "true" ]; then
331 } # Checks if arg is integer
333 # Desc: Checks if input arg is element in array
334 # Usage: checkIsInArray arg1 arg2
336 # Input: arg1: test string
338 # Output: exit code 0 if test string is in array; 1 otherwise
339 # Example: checkIsInArray "foo" "${myArray[@]}"
340 # Ref/Attrib: [1] How do I check if variable is an array? https://stackoverflow.com/a/27254437
341 # [2] How to pass an array as function argument? https://askubuntu.com/a/674347
342 local return_state input arg1 string_test
343 declare -a arg2 array_test
344 input
=("$@") # See [2]
346 arg2
=("${input[@]:1}");
347 #yell "DEBUG:input:${input[@]}";
348 #yell "DEBUG:arg1:${arg1[@]}";
349 #yell "DEBUG:arg2:${arg2[@]}";
352 array_test
=("${arg2[@]}");
354 #yell "DEBUG:string_test:$string_test";
355 #yell "DEBUG:$(declare -p array_test)";
356 for element
in "${array_test[@]}"; do
357 #yell "DEBUG:element:$element";
358 if [[ "$element" =~ ^
"$string_test" ]]; then
365 if [[ $return_state == "true" ]]; then
370 } # Check if string is element in array
373 # Input: arg1: path to source tree
374 # arg2: path to destination tree
375 # arg3: cumulative duration (seconds) of audio files in destination tree
376 # arg4: cumulative size (bytes) of audio files in destination tree (optional)
377 # assoc arrays: appRollCall, fileRollCall, dirRollCall
378 # env.var: BKSHUF_PARAM_LINEC (bkshuf)
379 # BKSHUF_PARAM_GSIZE (bkshuf)
380 # arrays: music_codecs
381 # vars: max_filename_length, min_file_duration, max_file_duration,
382 # min_file_size, max_file_size, siz_dest, max_find_depth
384 # Depends: yell(), checkdir() 0.1.2, displayMissing() 1.0.0, GNU Coreutils 8.30
385 # BK-2020-03: bkshuf v0.1.0
386 local arg1 arg2 arg3 dur_dest dir_source dir_dest
387 declare -a list_files
# array for files to be considered
388 declare -a list_copy
# array for files to be copied (string: "$dur,$fsize,$path")
395 if ! { [[ $# -eq 3 ]] ||
[[ $# -eq 4 ]]; }; then
397 die
"ERROR:Invalid number of args:$#"; fi;
400 if ! checkInt
"$BKSHUF_PARAM_LINEC"; then
401 die
"FATAL:Not an int:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_LINEC"; fi;
402 if ! checkInt
"$BKSHUF_PARAM_GSIZE"; then
403 die
"FATAL:Not an int:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_GSIZE"; fi;
406 if checkInt
"$arg3"; then
409 die
"FATAL:Duration (seconds) not an int:$arg3"
413 if [[ -n "$arg4" ]]; then
414 if checkInt
"$arg4"; then
417 die
"FATAL:Size (bytes) not an int:$arg4";
422 if checkdir
"$arg1" "$arg2"; then
426 yell
"ERROR:Directory error";
430 checkapp ffprobe bkshuf
;
432 if ! displayMissing
; then
434 die
"ERROR:Check missing resources.";
437 yell
"STATUS:Working...";
439 # Populate list_files array
440 while read -r line
; do
441 list_files
+=("$line");
442 done < <(find -L "$dir_source" -maxdepth "$max_find_depth" -type f | \
443 grep -Ev "$ext_ignore" | \
446 # Test and add random elements of list_files to list_copy
447 dur
=0; # Initialize duration
448 siz
=0; # Initialize size
449 n
=0; # Initialize loop counter
450 dur_cand_w
=1; # Init duration digit width counter
451 siz_cand_w
=1; # Init size digit width counter
452 ## Get element count of list_files array
453 file_count
="${#list_files[@]}";
454 while read -r line
&& \
455 [[ $dur -le $
((dur_dest
* 95 / 100)) ]] && \
456 [[ $siz -le $
((siz_dest
* 95 / 100)) ]] && \
457 [[ $n -le $file_count ]]; do
460 yell
"DEBUG:list_copy building loop:$n/$file_count"; # debug
461 printf "DEBUG:%8d,%8d,%8d/%8d,%8d/%8d\n" "$dur_cand" "$siz_cand" "$dur" "$dur_dest" "$siz" "$siz_dest"; # debug
463 path_candfile
="$line"; # path of candidate file
466 siz_cand
="$(du -b "$path_candfile" | awk '{ print $1 }')"; # size in bytes
467 if ! checkInt
"$siz_cand"; then continue; fi; # reject
468 if [[ "$((siz + siz_cand))" -gt "$siz_dest" ]]; then continue; fi; # reject
469 if [[ "$siz_cand" -lt "$min_file_size" ]]; then continue; fi; # reject
470 if [[ "$siz_cand" -gt "$max_file_size" ]]; then continue; fi; # reject
472 ### Check if has valid codec
473 if ! check_parsable_audio_ffprobe
"$path_candfile"; then continue; fi; # reject
475 ### Check if desired codec
476 file_format
="$(get_audio_format "$path_candfile")";
477 if ! checkIsInArray
"$file_format" "${music_codecs[@]}"; then continue; fi; # reject
480 dur_cand
="$(get_media_length "$path_candfile")";
481 dur_cand
="${dur_cand%%.*}"; # convert float to int
482 if ! checkInt
"$dur_cand"; then continue; fi; # reject
483 if [[ "$((dur + dur_cand))" -gt "$dur_dest" ]]; then continue; fi; # reject
484 if [[ "$dur_cand" -lt "$min_file_duration" ]]; then continue; fi; # reject
485 if [[ "$dur_cand" -gt "$max_file_duration" ]]; then continue; fi; # reject
487 ### Update stats digits widths
489 dur_cand_wnow
="$(printf "%s
" "$dur_cand" | wc -m)"; # duration width count
490 if [[ $dur_cand_wnow -gt $dur_cand_w ]]; then
491 dur_cand_w
="$dur_cand_wnow"; fi;
493 siz_cand_wnow
="$(printf "%s
" "$siz_cand" | wc -m)"; # size width count
494 if [[ $siz_cand_wnow -gt $siz_cand_w ]]; then
495 siz_cand_w
="$siz_cand_wnow"; fi;
497 ### Add/update candfile to array:
498 ### list_copy (array with "duration, size, path")
499 #yell "DEBUG:Adding $path_candfile";
500 printf "DEBUG:%8d,%8d,%s\n" "$dur_cand" "$siz_cand" "$path_candfile" 1>&2;
501 #printf "DEBUG:dur:%s\n" "$dur" 1>&2;
502 #printf "DEBUG:siz:%s\n" "$siz" 1>&2;
503 list_copy
+=("$dur_cand,$siz_cand,$path_candfile"); # for copying with order
505 ### Update total duration $dur and total size $siz
506 dur
="$((dur + dur_cand))";
507 siz
="$((siz + siz_cand))";
508 yell
"DEBUG:dur:$dur";
509 yell
"DEBUG:siz:$siz";
510 done < <(printf "%s\n" "${list_files[@]}" | bkshuf
);
512 #yell "DEBUG:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_LINEC";
513 #yell "DEBUG:BKSHUF_PARAM_GSIZE:$BKSHUF_PARAM_GSIZE";
515 n
=0; # Initialize loop counter
516 num_w
="$(printf "%s
" "${#list_copy[@]}" | wc -m)"; # init file number format
517 num_fmt
="%0""$num_w""d";
518 path_log_output
="$dir_dest"/COPY.log
;
519 printf "num,fingerprint,duration,size,original_path\n" >> "$path_log_output";
520 # Copy files in list_copy to dir_dest;
521 while read -r line
; do
522 #yell "DEBUG:line:$line"; # debug
523 fdur
="$(printf "%s
" "$line" | cut -d',' -f1)";
524 fsize
="$(printf "%s
" "$line" | cut -d',' -f2)";
525 fpath
="$(printf "%s
" "$line" | cut -d',' -f3-)";
526 ## Get basename of path
527 file_basename
="$(basename "$fpath")";
528 ### Get basename without unprintable non-ASCII characters
529 file_basename_compat
="$(printf "%s
" "$file_basename" | tr -dc '[:graph:][:space:]' )";
531 ## Get 16-character b2sum fingerprint (for different files that share basename)
532 fingerprint
="$(b2sum -l32 "$fpath" | awk '{print $1}' )";
534 ## Form output filename
535 num
="$(printf "$num_fmt" "$n")";
536 file_name
="$num"_
"$fingerprint"..
"$file_basename_compat";
537 file_name
="${file_name:0:$max_filename_length}"; # Limit filename length (e.g. Windows has max of 255 characters)
540 path_output
="$dir_dest"/"$file_name";
543 must
cp "$fpath" "$path_output" && yell
"NOTICE:Copied ($(printf "%""$dur_cand_w"d "$fdur") seconds): $fpath ";
544 #yell "DEBUG:Copied $file_basename to $dur_dest.";
547 fpath_can
="$(readlink -f "$fpath")"; # resolve symlinks to canonical path
548 log_fmt
="%s,%s,%""$dur_cand_w""d,%""$siz_cand_w""d,%s\n"; # e.g. "%s,%3d,%5d,%s" if dur_cand_w=3 and siz_cand_w=5
549 printf "$log_fmt" "$num" "$fingerprint" "$fdur" "$fsize" "$fpath_can" >> "$path_log_output";
552 unset file_basename file_basename_compat path_output
;
553 done < <(printf "%s\n" "${list_copy[@]}");
555 # Report total duration and size
556 yell
"NOTICE:Total duration (seconds):$dur";
557 yell
"NOTICE:Total size (bytes):$siz";
563 # Author: Steven Baltakatei Sandoval
567 # Author: Steven Baltakatei Sandoval
569 # URL: https://gitlab.com/baltakatei/baltakatei-exdev/-/blob/b9e8b771e985fcdf26ba8b9ccb8e31b62da757d3/unitproc/bkshuf