feat(user/bk-copy-rand-music):Clump output with bkshuf
[BK-2020-03.git] / user / bk-copy-rand-music
... / ...
CommitLineData
1#!/usr/bin/env bash
2# Desc: Copies random audio files
3# Usage: bk-copy-rand-music [dir SOURCE] [dir DEST] [int DURATION] ([int BYTES])
4# Version: 0.1.0
5# Depends: BK-2020-03: bkshuf v0.1.0
6
7declare -Ag appRollCall # Associative array for storing app status
8declare -Ag fileRollCall # Associative array for storing file status
9declare -Ag dirRollCall # Associative array for storing dir status
10declare -a music_codecs # Array for storing valid codec names (e.g. "aac" "mp3")
11
12# Adjustable parameters
13music_codecs=("vorbis" "aac" "mp3" "flac" "opus"); # whitelist of valid codec_names ffprobe might return
14max_filename_length="255"; # max output filename length
15min_file_duration="10"; # minimum duration per music file
16max_file_duration="3600"; # maximum duration per music file
17min_file_size="100000"; # minimum size per music file (bytes)
18max_file_size="100000000"; # maximum size per music file (bytes)
19siz_dest="600000000"; # default destination size limit: 600 MB
20max_find_depth="10"; # max find depth
21
22
23yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
24die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
25must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails
26checkapp() {
27 # Desc: If arg is a command, save result in assoc array 'appRollCall'
28 # Usage: checkapp arg1 arg2 arg3 ...
29 # Version: 0.1.1
30 # Input: global assoc. array 'appRollCall'
31 # Output: adds/updates key(value) to global assoc array 'appRollCall'
32 # Depends: bash 5.0.3
33 local returnState
34
35 #===Process Args===
36 for arg in "$@"; do
37 if command -v "$arg" 1>/dev/null 2>&1; then # Check if arg is a valid command
38 appRollCall[$arg]="true";
39 if ! [ "$returnState" = "false" ]; then returnState="true"; fi;
40 else
41 appRollCall[$arg]="false"; returnState="false";
42 fi;
43 done;
44
45 #===Determine function return code===
46 if [ "$returnState" = "true" ]; then
47 return 0;
48 else
49 return 1;
50 fi;
51} # Check that app exists
52checkfile() {
53 # Desc: If arg is a file path, save result in assoc array 'fileRollCall'
54 # Usage: checkfile arg1 arg2 arg3 ...
55 # Version: 0.1.1
56 # Input: global assoc. array 'fileRollCall'
57 # Output: adds/updates key(value) to global assoc array 'fileRollCall';
58 # Output: returns 0 if app found, 1 otherwise
59 # Depends: bash 5.0.3
60 local returnState
61
62 #===Process Args===
63 for arg in "$@"; do
64 if [ -f "$arg" ]; then
65 fileRollCall["$arg"]="true";
66 if ! [ "$returnState" = "false" ]; then returnState="true"; fi;
67 else
68 fileRollCall["$arg"]="false"; returnState="false";
69 fi;
70 done;
71
72 #===Determine function return code===
73 if [ "$returnState" = "true" ]; then
74 return 0;
75 else
76 return 1;
77 fi;
78} # Check that file exists
79checkdir() {
80 # Desc: If arg is a dir path, save result in assoc array 'dirRollCall'
81 # Usage: checkdir arg1 arg2 arg3 ...
82 # Version 0.1.2
83 # Input: global assoc. array 'dirRollCall'
84 # Output: adds/updates key(value) to global assoc array 'dirRollCall';
85 # Output: returns 0 if all args are dirs; 1 otherwise
86 # Depends: Bash 5.0.3
87 local returnState
88
89 #===Process Args===
90 for arg in "$@"; do
91 if [ -z "$arg" ]; then
92 dirRollCall["(Unspecified Dirname(s))"]="false"; returnState="false";
93 elif [ -d "$arg" ]; then
94 dirRollCall["$arg"]="true";
95 if ! [ "$returnState" = "false" ]; then returnState="true"; fi
96 else
97 dirRollCall["$arg"]="false"; returnState="false";
98 fi
99 done
100
101 #===Determine function return code===
102 if [ "$returnState" = "true" ]; then
103 return 0;
104 else
105 return 1;
106 fi
107} # Check that dir exists
108displayMissing() {
109 # Desc: Displays missing apps, files, and dirs
110 # Usage: displayMissing
111 # Version 1.0.0
112 # Input: associative arrays: appRollCall, fileRollCall, dirRollCall
113 # Output: stderr: messages indicating missing apps, file, or dirs
114 # Output: returns exit code 0 if nothing missing; 1 otherwise
115 # Depends: bash 5, checkAppFileDir()
116 local missingApps value appMissing missingFiles fileMissing
117 local missingDirs dirMissing
118
119 #==BEGIN Display errors==
120 #===BEGIN Display Missing Apps===
121 missingApps="Missing apps :";
122 #for key in "${!appRollCall[@]}"; do echo "DEBUG:$key => ${appRollCall[$key]}"; done
123 for key in "${!appRollCall[@]}"; do
124 value="${appRollCall[$key]}";
125 if [ "$value" = "false" ]; then
126 #echo "DEBUG:Missing apps: $key => $value";
127 missingApps="$missingApps""$key ";
128 appMissing="true";
129 fi;
130 done;
131 if [ "$appMissing" = "true" ]; then # Only indicate if an app is missing.
132 echo "$missingApps" 1>&2;
133 fi;
134 unset value;
135 #===END Display Missing Apps===
136
137 #===BEGIN Display Missing Files===
138 missingFiles="Missing files:";
139 #for key in "${!fileRollCall[@]}"; do echo "DEBUG:$key => ${fileRollCall[$key]}"; done
140 for key in "${!fileRollCall[@]}"; do
141 value="${fileRollCall[$key]}";
142 if [ "$value" = "false" ]; then
143 #echo "DEBUG:Missing files: $key => $value";
144 missingFiles="$missingFiles""$key ";
145 fileMissing="true";
146 fi;
147 done;
148 if [ "$fileMissing" = "true" ]; then # Only indicate if an app is missing.
149 echo "$missingFiles" 1>&2;
150 fi;
151 unset value;
152 #===END Display Missing Files===
153
154 #===BEGIN Display Missing Directories===
155 missingDirs="Missing dirs:";
156 #for key in "${!dirRollCall[@]}"; do echo "DEBUG:$key => ${dirRollCall[$key]}"; done
157 for key in "${!dirRollCall[@]}"; do
158 value="${dirRollCall[$key]}";
159 if [ "$value" = "false" ]; then
160 #echo "DEBUG:Missing dirs: $key => $value";
161 missingDirs="$missingDirs""$key ";
162 dirMissing="true";
163 fi;
164 done;
165 if [ "$dirMissing" = "true" ]; then # Only indicate if an dir is missing.
166 echo "$missingDirs" 1>&2;
167 fi;
168 unset value;
169 #===END Display Missing Directories===
170
171 #==END Display errors==
172 #==BEGIN Determine function return code===
173 if [ "$appMissing" == "true" ] || [ "$fileMissing" == "true" ] || [ "$dirMissing" == "true" ]; then
174 return 1;
175 else
176 return 0;
177 fi
178 #==END Determine function return code===
179} # Display missing apps, files, dirs
180showUsage() {
181 # Desc: Display script usage information
182 # Usage: showUsage
183 # Version 0.0.1
184 # Input: none
185 # Output: stdout
186 # Depends: GNU-coreutils 8.30 (cat)
187 cat <<'EOF'
188
189 DESCRIPTION:
190 This script may be used to copy a random selection of files containing
191 audio tracks from SOURCE to DEST.
192
193 USAGE:
194 bk-copy-rand-music [dir SOURCE] [dir DEST] [int DURATION] (int BYTES)
195
196 EXAMPLE:
197 bk-copy-rand-music ~/Music /tmp/music-sample 3600
198 bk-copy-rand-music ~/Music /tmp/music-sample 3600 680000000
199
200 DEPENDENCIES:
201 ffprobe
202 GNU Coreutils 8.30
203EOF
204} # Display information on how to use this script.
205check_parsable_audio_ffprobe() {
206 # Desc: Checks if ffprobe returns valid audio codec name for file
207 # Usage: check_parsable_audio_ffprobe [path FILE]
208 # Version: 0.0.1
209 # Input: arg1: file path
210 # Output: exit code 0 if returns valid codec name; 1 otherwise
211 # Depends: ffprobe, die()
212 local file_in ffprobe_out
213
214 if [[ $# -ne 1 ]]; then die "ERROR:Invalid number of args:$#"; fi;
215
216 file_in="$1";
217
218 # Check if ffprobe detects an audio stream
219 if ffprobe -v error -select_streams a -show_entries stream=codec_name -of default=nokey=1:noprint_wrappers=1 "$file_in" 1>/dev/null 2>&1; then
220 return_state="true";
221 else
222 return_state="false";
223 fi;
224
225 # Fail if ffprobe returns no result
226 ffprobe_out="$(ffprobe -v error -select_streams a -show_entries stream=codec_name -of default=nokey=1:noprint_wrappers=1 "$file_in")";
227 if [[ -z $ffprobe_out ]]; then
228 return_state="false";
229 fi;
230
231 # Report exit code
232 if [[ $return_state = "true" ]]; then
233 return 0;
234 else
235 return 1;
236 fi;
237} # Checks if file has valid codec name using ffprobe
238get_audio_format() {
239 # Desc: Gets audio format of file as string
240 # Usage: get_audio_format arg1
241 # Depends: ffprobe
242 # Version: 0.0.1
243 # Input: arg1: input file path
244 # Output: stdout (if valid audio format)
245 # exit code 0 if audio file; 1 otherwise
246 # Example: get_audio_format myvideo.mp4
247 # Note: Would return "opus" if full ffprobe report had 'Audio: opus, 48000 Hz, stereo, fltp'
248 # Note: Not tested with videos containing multiple video streams
249 # Ref/Attrib: [1] https://stackoverflow.com/questions/5618363/is-there-a-way-to-use-ffmpeg-to-determine-the-encoding-of-a-file-before-transcod
250 # [2] https://stackoverflow.com/questions/44123532/how-to-find-out-the-file-extension-for-extracting-audio-tracks-with-ffmpeg-and-p#comment88464070_50723126
251 local audio_format file_in;
252 local return_state;
253 file_in="$1";
254
255 # Return error exit code if not audio file
256 ## Return error if ffprobe itself exited on error
257 if ! ffprobe -v error -select_streams a -show_entries stream=codec_name -of default=nokey=1:noprint_wrappers=1 "$file_in" 1>/dev/null 2>&1; then
258 return_state="false";
259 fi;
260
261 # Get audio format
262 audio_format="$(ffprobe -v error -select_streams a -show_entries stream=codec_name -of default=nokey=1:noprint_wrappers=1 "$file_in")"; # see [1]
263
264 ## Return error if audio format is incorrectly formatted (e.g. reject if contains spaces)
265 pattern="^[[:alnum:]]+$"; # alphanumeric string with no spaces
266 if [[ $audio_format =~ $pattern ]]; then
267 return_state="true";
268 # Report audio format
269 echo "$audio_format";
270 else
271 return_state="false";
272 fi;
273
274 # Report exit code
275 if [[ $return_state = "true" ]]; then
276 return 0;
277 else
278 return 1;
279 fi;
280} # Get audio format as stdout
281get_media_length() {
282 # Use ffprobe to get media container length in seconds (float)
283 # Usage: get_media_length arg1
284 # Input: arg1: path to file
285 # Output: stdout: seconds (float)
286 # Depends: ffprobe 4.1.8
287 # Ref/Attrib: [1] How to get video duration in seconds? https://superuser.com/a/945604
288 local file_in
289 file_in="$1";
290 if [[ ! -f $file_in ]]; then
291 die "ERROR:Not a file:$file_in";
292 fi;
293 ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$file_in";
294} # Get media container length in seconds via stdout
295checkInt() {
296 # Desc: Checks if arg is integer
297 # Usage: checkInt arg
298 # Input: arg: integer
299 # Output: - return code 0 (if arg is integer)
300 # - return code 1 (if arg is not integer)
301 # Example: if ! checkInt $arg; then echo "not int"; fi;
302 # Version: 0.0.1
303 local returnState
304
305 #===Process Arg===
306 if [[ $# -ne 1 ]]; then
307 die "ERROR:Invalid number of arguments:$#";
308 fi;
309
310 RETEST1='^[0-9]+$'; # Regular Expression to test
311 if [[ ! $1 =~ $RETEST1 ]] ; then
312 returnState="false";
313 else
314 returnState="true";
315 fi;
316
317 #===Determine function return code===
318 if [ "$returnState" = "true" ]; then
319 return 0;
320 else
321 return 1;
322 fi;
323} # Checks if arg is integer
324checkIsInArray() {
325 # Desc: Checks if input arg is element in array
326 # Usage: checkIsInArray arg1 arg2
327 # Version: 0.0.1
328 # Input: arg1: test string
329 # arg2: array
330 # Output: exit code 0 if test string is in array; 1 otherwise
331 # Example: checkIsInArray "foo" "${myArray[@]}"
332 # Ref/Attrib: [1] How do I check if variable is an array? https://stackoverflow.com/a/27254437
333 # [2] How to pass an array as function argument? https://askubuntu.com/a/674347
334 local return_state input arg1 string_test
335 declare -a arg2 array_test
336 input=("$@") # See [2]
337 arg1="${input[0]}";
338 arg2=("${input[@]:1}");
339 #yell "DEBUG:input:${input[@]}";
340 #yell "DEBUG:arg1:${arg1[@]}";
341 #yell "DEBUG:arg2:${arg2[@]}";
342
343 string_test="$arg1";
344 array_test=("${arg2[@]}");
345
346 #yell "DEBUG:string_test:$string_test";
347 #yell "DEBUG:$(declare -p array_test)";
348 for element in "${array_test[@]}"; do
349 #yell "DEBUG:element:$element";
350 if [[ "$element" =~ ^"$string_test" ]]; then
351 return_state="true";
352 continue;
353 fi;
354 done;
355
356 # Report exit code
357 if [[ $return_state == "true" ]]; then
358 return 0;
359 else
360 return 1;
361 fi;
362} # Check if string is element in array
363main() {
364 # Desc: Main program
365 # Input: arg1: path to source tree
366 # arg2: path to destination tree
367 # arg3: cumulative duration (seconds) of audio files in destination tree
368 # arg4: cumulative size (bytes) of audio files in destination tree (optional)
369 # assoc arrays: appRollCall, fileRollCall, dirRollCall
370 # env.var: BKSHUF_PARAM_LINEC
371 # BKSHUF_PARAM_GSIZE
372 # arrays: music_codecs
373 # vars: max_filename_length, min_file_duration, max_file_duration,
374 # min_file_size, max_file_size, siz_dest, max_find_depth
375 # Output: [none]
376 # Depends: yell(), checkdir() 0.1.2, displayMissing() 1.0.0, GNU Coreutils 8.30
377 # BK-2020-03: bkshuf v0.1.0
378 local arg1 arg2 arg3 dur_dest dir_source dir_dest
379 declare -a list_files # array for files to be considered
380 declare -a list_copy_sa # simple array for files to be copied (string: "$dur,$path")
381
382 # Parse args
383 arg1="$1";
384 arg2="$2";
385 arg3="$3";
386 arg4="$4";
387 if ! ([[ $# -eq 3 ]] || [[ $# -eq 4 ]]); then showUsage; die "ERROR:Invalid number of args:$#"; fi;
388
389 ## Check duration
390 if checkInt "$arg3"; then
391 dur_dest="$arg3";
392 else
393 yell "ERROR:Duration (seconds) not an int:$arg3"
394 fi;
395
396 ## Check size
397 if [[ -n "$arg4" ]]; then
398 if checkInt "$arg4"; then
399 siz_dest="$arg4";
400 else
401 yell "ERROR:Size (bytes) not an int:$arg4";
402 fi;
403 fi;
404
405 ## Check directories
406 if checkdir "$arg1" "$arg2"; then
407 dir_source="$arg1";
408 dir_dest="$arg2";
409 else
410 yell "ERROR:Directory error";
411 fi;
412
413 ## Check apps
414 checkapp ffprobe bkshuf;
415
416 if ! displayMissing; then
417 showUsage;
418 die "ERROR:Check missing resources.";
419 fi;
420
421 yell "STATUS:Working...";
422
423 # Populate list_files array
424 while read -r line; do
425 list_files+=("$line");
426 done < <(find -L "$dir_source" -maxdepth "$max_find_depth" -type f | sort);
427
428 # Test and add random elements of list_files to list_copy
429 dur=0; # Initialize duration
430 siz=0; # Initialize size
431 n=0; # Initialize loop counter
432 dur_cand_w=1; # Init duration digit width counter
433 siz_cand_w=1; # Init size digit width counter
434 ## Get element count of list_files array
435 file_count="${#list_files[@]}";
436 while read -r line && \
437 [[ $dur -le $dur_dest ]] && \
438 [[ $siz -le $siz_dest ]] && \
439 [[ $n -le $file_count ]]; do
440 #yell "DEBUG:list_copy building loop:$n";
441 path_candfile="$line"; # path of candidate file
442
443 ### Check if has valid codec
444 if ! check_parsable_audio_ffprobe "$path_candfile"; then continue; fi; # reject
445
446 ### Check if desired codec
447 file_format="$(get_audio_format "$path_candfile")";
448 if ! checkIsInArray "$file_format" "${music_codecs[@]}"; then continue; fi; # reject
449
450 ### Check and save duration
451 dur_cand="$(get_media_length "$path_candfile")";
452 dur_cand="${dur_cand%%.*}"; # convert float to int
453 if [[ "$((dur + dur_cand))" -gt "$dur_dest" ]]; then continue; fi; # reject
454 dur_cand_wnow="$(printf "%s" "$dur_cand" | wc -m)"; # duration width count
455 if [[ $dur_cand_wnow -gt $dur_cand_w ]]; then
456 dur_cand_w="$dur_cand_wnow"; fi;
457 if ! checkInt "$dur_cand"; then continue; fi; # reject
458 if [[ "$dur_cand" -lt "$min_file_duration" ]]; then continue; fi; # reject
459 if [[ "$dur_cand" -gt "$max_file_duration" ]]; then continue; fi; # reject
460
461 ### Check and save size
462 siz_cand="$(du -b "$path_candfile" | awk '{ print $1 }')"; # size in bytes
463 siz_cand_wnow="$(printf "%s" "$siz_cand" | wc -m)"; # size width count
464 if [[ $siz_cand_wnow -gt $siz_cand_w ]]; then
465 siz_cand_w="$siz_cand_wnow"; fi;
466 if ! checkInt "$siz_cand"; then continue; fi; # reject
467 if [[ "$siz_cand" -lt "$min_file_size" ]]; then continue; fi; # reject
468 if [[ "$siz_cand" -gt "$max_file_size" ]]; then continue; fi; # reject
469
470 ### Add/update candfile to array:
471 ### list_copy_sa (simple array with only paths)
472 #yell "DEBUG:Adding $path_candfile";
473 list_copy_sa+=("$dur_cand,$siz_cand,$path_candfile"); # for copying with order
474
475 ### Update total duration $dur and total size $siz
476 dur="$((dur + dur_cand))";
477 siz="$((siz + siz_cand))";
478 #yell "DEBUG:dur:$dur";
479 #yell "DEBUG:siz:$siz";
480
481 ((n++));
482 done < <(printf "%s\n" "${list_files[@]}" | bkshuf);
483
484 n=0; # Initialize loop counter
485 num_w="$(printf "%s" "${#list_copy_sa[@]}" | wc -m)"; # init file number format
486 num_fmt="%0""$num_w""d";
487 path_log_output="$dir_dest"/COPY.log;
488 printf "num,fingerprint,duration,size,original_path\n" >> "$path_log_output";
489 # Copy files in list_copy to dir_dest;
490 while read -r line; do
491 yell "DEBUG:line:$line"; # debug
492 fdur="$(printf "%s" "$line" | cut -d',' -f1)";
493 fsize="$(printf "%s" "$line" | cut -d',' -f2)";
494 fpath="$(printf "%s" "$line" | cut -d',' -f3-)";
495 ## Get basename of path
496 file_basename="$(basename "$fpath")";
497
498 ## Get 16-character b2sum fingerprint (for different files that share basename)
499 fingerprint="$(b2sum -l32 "$fpath" | awk '{print $1}' )";
500
501 ## Form output filename
502 num="$(printf "$num_fmt" "$n")";
503 file_name="$num"_"$fingerprint".."$file_basename";
504 file_name="${file_name:0:$max_filename_length}"; # Limit filename length (e.g. Windows has max of 255 characters)
505
506 ## Form output path
507 path_output="$dir_dest"/"$file_name";
508
509 ## Copy
510 must cp "$fpath" "$path_output" && yell "NOTICE:Copied ($fdur seconds): $fpath ";
511 #yell "DEBUG:Copied $file_basename to $dur_dest.";
512
513 ## Append log
514 fpath_can="$(readlink -f "$fpath")"; # resolve symlinks to canonical path
515 log_fmt="%s,%s,%""$dur_cand_w""d,%""$siz_cand_w""d,%s\n"; # e.g. "%s,%3d,%5d,%s" if dur_cand_w=3 and siz_cand_w=5
516 #yell "DEBUG:log_fmt:$log_fmt"; sleep 10; # debug
517 printf "$log_fmt" "$num" "$fingerprint" "$fdur" "$fsize" "$fpath_can" >> "$path_log_output";
518
519 ((n++));
520 unset file_basename path_output
521 done < <(printf "%s\n" "${list_copy_sa[@]}");
522
523 # Report total duration and size
524 yell "NOTICE:Total duration (seconds):$dur";
525 yell "NOTICE:Total size (bytes):$siz";
526
527} # Main program
528
529main "$@";
530
531# Author: Steven Baltakatei Sandoval
532# License: GPLv3+