X-Git-Url: https://zdv2.bktei.com/gitweb/BK-2020-03.git/blobdiff_plain/b9e8b771e985fcdf26ba8b9ccb8e31b62da757d3..7e772cd7416dcaef1b0f9a8cd09ce5e8d34c05c9:/user/bk-copy-rand-music diff --git a/user/bk-copy-rand-music b/user/bk-copy-rand-music old mode 100644 new mode 100755 index 18f95f3..2303d50 --- a/user/bk-copy-rand-music +++ b/user/bk-copy-rand-music @@ -1,7 +1,7 @@ #!/usr/bin/env bash # Desc: Copies random audio files # Usage: bk-copy-rand-music [dir SOURCE] [dir DEST] [int DURATION] ([int BYTES]) -# Version: 0.1.0 +# Version: 0.3.0 # Depends: BK-2020-03: bkshuf v0.1.0 declare -Ag appRollCall # Associative array for storing app status @@ -11,14 +11,18 @@ declare -a music_codecs # Array for storing valid codec names (e.g. "aac" "mp3") # Adjustable parameters music_codecs=("vorbis" "aac" "mp3" "flac" "opus"); # whitelist of valid codec_names ffprobe might return +ext_ignore=".ots\$|.mid\$|.json\$|.gz\$|.jpg\$|.png\$|.asc\$|.pdf\$|.txt\$|.vtt\$|\.SUM|.zip\$|.xz\$|.org\$|.txt\$"; # blacklist of file extensions for 'grep -Evi' max_filename_length="255"; # max output filename length -min_file_duration="10"; # minimum duration per music file +min_file_duration="30"; # minimum duration per music file max_file_duration="3600"; # maximum duration per music file min_file_size="100000"; # minimum size per music file (bytes) max_file_size="100000000"; # maximum size per music file (bytes) siz_dest="600000000"; # default destination size limit: 600 MB max_find_depth="10"; # max find depth +# Load env vars (bkshuf defaults for typical music albums) +if [[ ! -v BKSHUF_PARAM_LINEC ]]; then export BKSHUF_PARAM_LINEC=1000000; fi; +if [[ ! -v BKSHUF_PARAM_GSIZE ]]; then export BKSHUF_PARAM_GSIZE=10; fi; yell() { echo "$0: $*" >&2; } # print script path and all args to stderr die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status @@ -200,6 +204,10 @@ showUsage() { DEPENDENCIES: ffprobe GNU Coreutils 8.30 + + ENVIRONMENT VARIABLES + BKSHUF_PARAM_LINEC (see `bkshuf` in BK-2020-03) + BKSHUF_PARAM_GSIZE (see `bkshuf` in BK-2020-03) EOF } # Display information on how to use this script. check_parsable_audio_ffprobe() { @@ -367,8 +375,8 @@ main() { # arg3: cumulative duration (seconds) of audio files in destination tree # arg4: cumulative size (bytes) of audio files in destination tree (optional) # assoc arrays: appRollCall, fileRollCall, dirRollCall - # env.var: BKSHUF_PARAM_LINEC - # BKSHUF_PARAM_GSIZE + # env.var: BKSHUF_PARAM_LINEC (bkshuf) + # BKSHUF_PARAM_GSIZE (bkshuf) # arrays: music_codecs # vars: max_filename_length, min_file_duration, max_file_duration, # min_file_size, max_file_size, siz_dest, max_find_depth @@ -377,20 +385,28 @@ main() { # BK-2020-03: bkshuf v0.1.0 local arg1 arg2 arg3 dur_dest dir_source dir_dest declare -a list_files # array for files to be considered - declare -a list_copy_sa # simple array for files to be copied (string: "$dur,$path") + declare -a list_copy # array for files to be copied (string: "$dur,$fsize,$path") # Parse args arg1="$1"; arg2="$2"; arg3="$3"; arg4="$4"; - if ! ([[ $# -eq 3 ]] || [[ $# -eq 4 ]]); then showUsage; die "ERROR:Invalid number of args:$#"; fi; - + if ! { [[ $# -eq 3 ]] || [[ $# -eq 4 ]]; }; then + showUsage; + die "ERROR:Invalid number of args:$#"; fi; + + # Check env vars + if ! checkInt "$BKSHUF_PARAM_LINEC"; then + die "FATAL:Not an int:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_LINEC"; fi; + if ! checkInt "$BKSHUF_PARAM_GSIZE"; then + die "FATAL:Not an int:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_GSIZE"; fi; + ## Check duration if checkInt "$arg3"; then dur_dest="$arg3"; else - yell "ERROR:Duration (seconds) not an int:$arg3" + die "FATAL:Duration (seconds) not an int:$arg3" fi; ## Check size @@ -398,7 +414,7 @@ main() { if checkInt "$arg4"; then siz_dest="$arg4"; else - yell "ERROR:Size (bytes) not an int:$arg4"; + die "FATAL:Size (bytes) not an int:$arg4"; fi; fi; @@ -423,7 +439,9 @@ main() { # Populate list_files array while read -r line; do list_files+=("$line"); - done < <(find -L "$dir_source" -maxdepth "$max_find_depth" -type f | sort); + done < <(find -L "$dir_source" -maxdepth "$max_find_depth" -type f | \ + grep -Ev "$ext_ignore" | \ + sort); # Test and add random elements of list_files to list_copy dur=0; # Initialize duration @@ -434,12 +452,23 @@ main() { ## Get element count of list_files array file_count="${#list_files[@]}"; while read -r line && \ - [[ $dur -le $dur_dest ]] && \ - [[ $siz -le $siz_dest ]] && \ + [[ $dur -le $((dur_dest * 95 / 100)) ]] && \ + [[ $siz -le $((siz_dest * 95 / 100)) ]] && \ [[ $n -le $file_count ]]; do - #yell "DEBUG:list_copy building loop:$n"; + ((n++)); + + yell "DEBUG:list_copy building loop:$n/$file_count"; # debug + printf "DEBUG:%8d,%8d,%8d/%8d,%8d/%8d\n" "$dur_cand" "$siz_cand" "$dur" "$dur_dest" "$siz" "$siz_dest"; # debug + path_candfile="$line"; # path of candidate file + ### Check size + siz_cand="$(du -b "$path_candfile" | awk '{ print $1 }')"; # size in bytes + if ! checkInt "$siz_cand"; then continue; fi; # reject + if [[ "$((siz + siz_cand))" -gt "$siz_dest" ]]; then continue; fi; # reject + if [[ "$siz_cand" -lt "$min_file_size" ]]; then continue; fi; # reject + if [[ "$siz_cand" -gt "$max_file_size" ]]; then continue; fi; # reject + ### Check if has valid codec if ! check_parsable_audio_ffprobe "$path_candfile"; then continue; fi; # reject @@ -447,78 +476,81 @@ main() { file_format="$(get_audio_format "$path_candfile")"; if ! checkIsInArray "$file_format" "${music_codecs[@]}"; then continue; fi; # reject - ### Check and save duration + ### Check duration dur_cand="$(get_media_length "$path_candfile")"; dur_cand="${dur_cand%%.*}"; # convert float to int + if ! checkInt "$dur_cand"; then continue; fi; # reject if [[ "$((dur + dur_cand))" -gt "$dur_dest" ]]; then continue; fi; # reject - dur_cand_wnow="$(printf "%s" "$dur_cand" | wc -m)"; # duration width count - if [[ $dur_cand_wnow -gt $dur_cand_w ]]; then - dur_cand_w="$dur_cand_wnow"; fi; - if ! checkInt "$dur_cand"; then continue; fi; # reject if [[ "$dur_cand" -lt "$min_file_duration" ]]; then continue; fi; # reject if [[ "$dur_cand" -gt "$max_file_duration" ]]; then continue; fi; # reject - ### Check and save size - siz_cand="$(du -b "$path_candfile" | awk '{ print $1 }')"; # size in bytes + ### Update stats digits widths + #### duration + dur_cand_wnow="$(printf "%s" "$dur_cand" | wc -m)"; # duration width count + if [[ $dur_cand_wnow -gt $dur_cand_w ]]; then + dur_cand_w="$dur_cand_wnow"; fi; + #### size siz_cand_wnow="$(printf "%s" "$siz_cand" | wc -m)"; # size width count if [[ $siz_cand_wnow -gt $siz_cand_w ]]; then siz_cand_w="$siz_cand_wnow"; fi; - if ! checkInt "$siz_cand"; then continue; fi; # reject - if [[ "$siz_cand" -lt "$min_file_size" ]]; then continue; fi; # reject - if [[ "$siz_cand" -gt "$max_file_size" ]]; then continue; fi; # reject - + ### Add/update candfile to array: - ### list_copy_sa (simple array with only paths) + ### list_copy (array with "duration, size, path") #yell "DEBUG:Adding $path_candfile"; - list_copy_sa+=("$dur_cand,$siz_cand,$path_candfile"); # for copying with order + printf "DEBUG:%8d,%8d,%s\n" "$dur_cand" "$siz_cand" "$path_candfile" 1>&2; + #printf "DEBUG:dur:%s\n" "$dur" 1>&2; + #printf "DEBUG:siz:%s\n" "$siz" 1>&2; + list_copy+=("$dur_cand,$siz_cand,$path_candfile"); # for copying with order ### Update total duration $dur and total size $siz dur="$((dur + dur_cand))"; siz="$((siz + siz_cand))"; - #yell "DEBUG:dur:$dur"; - #yell "DEBUG:siz:$siz"; - - ((n++)); + yell "DEBUG:dur:$dur"; + yell "DEBUG:siz:$siz"; done < <(printf "%s\n" "${list_files[@]}" | bkshuf); + #yell "DEBUG:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_LINEC"; + #yell "DEBUG:BKSHUF_PARAM_GSIZE:$BKSHUF_PARAM_GSIZE"; + n=0; # Initialize loop counter - num_w="$(printf "%s" "${#list_copy_sa[@]}" | wc -m)"; # init file number format + num_w="$(printf "%s" "${#list_copy[@]}" | wc -m)"; # init file number format num_fmt="%0""$num_w""d"; path_log_output="$dir_dest"/COPY.log; printf "num,fingerprint,duration,size,original_path\n" >> "$path_log_output"; # Copy files in list_copy to dir_dest; while read -r line; do - yell "DEBUG:line:$line"; # debug + #yell "DEBUG:line:$line"; # debug fdur="$(printf "%s" "$line" | cut -d',' -f1)"; fsize="$(printf "%s" "$line" | cut -d',' -f2)"; fpath="$(printf "%s" "$line" | cut -d',' -f3-)"; ## Get basename of path file_basename="$(basename "$fpath")"; + ### Get basename without unprintable non-ASCII characters + file_basename_compat="$(printf "%s" "$file_basename" | tr -dc '[:graph:][:space:]' )"; ## Get 16-character b2sum fingerprint (for different files that share basename) fingerprint="$(b2sum -l32 "$fpath" | awk '{print $1}' )"; ## Form output filename num="$(printf "$num_fmt" "$n")"; - file_name="$num"_"$fingerprint".."$file_basename"; + file_name="$num"_"$fingerprint".."$file_basename_compat"; file_name="${file_name:0:$max_filename_length}"; # Limit filename length (e.g. Windows has max of 255 characters) ## Form output path path_output="$dir_dest"/"$file_name"; ## Copy - must cp "$fpath" "$path_output" && yell "NOTICE:Copied ($fdur seconds): $fpath "; + must cp "$fpath" "$path_output" && yell "NOTICE:Copied ($(printf "%""$dur_cand_w"d "$fdur") seconds): $fpath "; #yell "DEBUG:Copied $file_basename to $dur_dest."; ## Append log fpath_can="$(readlink -f "$fpath")"; # resolve symlinks to canonical path log_fmt="%s,%s,%""$dur_cand_w""d,%""$siz_cand_w""d,%s\n"; # e.g. "%s,%3d,%5d,%s" if dur_cand_w=3 and siz_cand_w=5 - #yell "DEBUG:log_fmt:$log_fmt"; sleep 10; # debug printf "$log_fmt" "$num" "$fingerprint" "$fdur" "$fsize" "$fpath_can" >> "$path_log_output"; ((n++)); - unset file_basename path_output - done < <(printf "%s\n" "${list_copy_sa[@]}"); + unset file_basename file_basename_compat path_output; + done < <(printf "%s\n" "${list_copy[@]}"); # Report total duration and size yell "NOTICE:Total duration (seconds):$dur"; @@ -530,3 +562,8 @@ main "$@"; # Author: Steven Baltakatei Sandoval # License: GPLv3+ + +# bkshuf v0.1.0 +# Author: Steven Baltakatei Sandoval +# License: GPLv3+ +# URL: https://gitlab.com/baltakatei/baltakatei-exdev/-/blob/b9e8b771e985fcdf26ba8b9ccb8e31b62da757d3/unitproc/bkshuf