From: Steven Baltakatei Sandoval Date: Sat, 18 Feb 2023 15:31:56 +0000 (+0000) Subject: feat(user/bk-copy-rand-music):Clump output with bkshuf X-Git-Tag: 0.8.1~8 X-Git-Url: https://zdv2.bktei.com/gitweb/BK-2020-03.git/commitdiff_plain/b9e8b771e985fcdf26ba8b9ccb8e31b62da757d3?ds=inline feat(user/bk-copy-rand-music):Clump output with bkshuf - feat(unitproc/bkshuf):Utilize environment variables if available - doc(.../bkshuf/article.tm):update TeXmacs article --- diff --git a/doc/unitproc/bkshuf/article.tm b/doc/unitproc/bkshuf/article.tm index 46f2c0e..3160815 100644 --- a/doc/unitproc/bkshuf/article.tm +++ b/doc/unitproc/bkshuf/article.tm @@ -6,7 +6,7 @@ |>>||> + Sandoval>>>||> @@ -204,11 +204,11 @@ size trends towards . <\eqnarray*> - >>||>|)>>>|>>||>>>|||>>=>>>>|||>>>>|>>||>>|>-1>||>>|>>||>|)>>>>>|>>||>|)>>>>>|||>|>>|)>>||,0>|)>-1|,0>|)>|]>>|)>\>|)>|]>+1|]>>>>>> + >>||>|)>>>|>>||>>>|||>>=>>>>|||>>>>|>>||>>|>-1>||>>|>>||>|)>>>>|>>||>|)>>>>|||>|>>|)>>||,0>|)>-1|,0>|)>|]>>|)>\>|)>|]>+1|]>>>>> @@ -280,14 +280,14 @@ <\references> <\collection> > - > + > > > > > > - > - > + > + > > > > @@ -299,6 +299,9 @@ > > > + > + > + > > > > diff --git a/unitproc/bkshuf b/unitproc/bkshuf index 4bf9f99..8aaac9d 100644 --- a/unitproc/bkshuf +++ b/unitproc/bkshuf @@ -1,12 +1,15 @@ #!/usr/bin/env bash # Desc: Mixes input lines while also preserving some neighbors # Usage: cat file | bkshuf arg1 -# Version 0.0.1 +# Version 0.1.0 # Depends: bc 1.07.1, GNU Coreutils 8.32 (shuf) # Input: var: arg1 initial lines to output -BKSHUF_PARAM_LINEC=1000000; -BKSHUF_PARAM_GSIZE=25 # lines per group for BKSHUF_PARAM_LINEC lines of input +# Load env vars +## For these numbers of lines of input... +if [[ ! -v BKSHUF_PARAM_LINEC ]]; then BKSHUF_PARAM_LINEC=1000000; fi; +## ... target this group size. +if [[ ! -v BKSHUF_PARAM_GSIZE ]]; then BKSHUF_PARAM_GSIZE=25; fi; yell() { echo "$0: $*" >&2; } # print script path and all args to stderr diff --git a/user/bk-copy-rand-music.sh b/user/bk-copy-rand-music old mode 100755 new mode 100644 similarity index 74% rename from user/bk-copy-rand-music.sh rename to user/bk-copy-rand-music index ea384fe..18f95f3 --- a/user/bk-copy-rand-music.sh +++ b/user/bk-copy-rand-music @@ -1,7 +1,8 @@ #!/usr/bin/env bash # Desc: Copies random audio files -# Usage: bk-copy-rand-music.sh [dir SOURCE] [dir DEST] [int DURATION] -# Version: 0.0.3 +# Usage: bk-copy-rand-music [dir SOURCE] [dir DEST] [int DURATION] ([int BYTES]) +# Version: 0.1.0 +# Depends: BK-2020-03: bkshuf v0.1.0 declare -Ag appRollCall # Associative array for storing app status declare -Ag fileRollCall # Associative array for storing file status @@ -10,13 +11,18 @@ declare -a music_codecs # Array for storing valid codec names (e.g. "aac" "mp3") # Adjustable parameters music_codecs=("vorbis" "aac" "mp3" "flac" "opus"); # whitelist of valid codec_names ffprobe might return -max_loops="1000000"; # max number of files to test whether are audio or not max_filename_length="255"; # max output filename length min_file_duration="10"; # minimum duration per music file +max_file_duration="3600"; # maximum duration per music file +min_file_size="100000"; # minimum size per music file (bytes) +max_file_size="100000000"; # maximum size per music file (bytes) +siz_dest="600000000"; # default destination size limit: 600 MB +max_find_depth="10"; # max find depth + yell() { echo "$0: $*" >&2; } # print script path and all args to stderr die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status -try() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails +must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails checkapp() { # Desc: If arg is a command, save result in assoc array 'appRollCall' # Usage: checkapp arg1 arg2 arg3 ... @@ -185,10 +191,11 @@ showUsage() { audio tracks from SOURCE to DEST. USAGE: - bk-copy-rand-music [dir SOURCE] [dir DEST] [int DURATION] + bk-copy-rand-music [dir SOURCE] [dir DEST] [int DURATION] (int BYTES) EXAMPLE: bk-copy-rand-music ~/Music /tmp/music-sample 3600 + bk-copy-rand-music ~/Music /tmp/music-sample 3600 680000000 DEPENDENCIES: ffprobe @@ -358,18 +365,26 @@ main() { # Input: arg1: path to source tree # arg2: path to destination tree # arg3: cumulative duration (seconds) of audio files in destination tree + # arg4: cumulative size (bytes) of audio files in destination tree (optional) # assoc arrays: appRollCall, fileRollCall, dirRollCall + # env.var: BKSHUF_PARAM_LINEC + # BKSHUF_PARAM_GSIZE + # arrays: music_codecs + # vars: max_filename_length, min_file_duration, max_file_duration, + # min_file_size, max_file_size, siz_dest, max_find_depth # Output: [none] - # Depends: yell(), checkdir() 0.1.2, displayMissing() 1.0.0, GNU Coreutils 8.30 (shuf) - local arg1 arg2 arg3 dur_dest dir_source dir_dest list_all + # Depends: yell(), checkdir() 0.1.2, displayMissing() 1.0.0, GNU Coreutils 8.30 + # BK-2020-03: bkshuf v0.1.0 + local arg1 arg2 arg3 dur_dest dir_source dir_dest declare -a list_files # array for files to be considered - declare -A list_copy # assoc array for files to be copied (key=path; value=duration) + declare -a list_copy_sa # simple array for files to be copied (string: "$dur,$path") # Parse args arg1="$1"; arg2="$2"; arg3="$3"; - if [[ $# -ne 3 ]]; then showUsage; die "ERROR:Invalid number of args."; fi; + arg4="$4"; + if ! ([[ $# -eq 3 ]] || [[ $# -eq 4 ]]); then showUsage; die "ERROR:Invalid number of args:$#"; fi; ## Check duration if checkInt "$arg3"; then @@ -377,6 +392,15 @@ main() { else yell "ERROR:Duration (seconds) not an int:$arg3" fi; + + ## Check size + if [[ -n "$arg4" ]]; then + if checkInt "$arg4"; then + siz_dest="$arg4"; + else + yell "ERROR:Size (bytes) not an int:$arg4"; + fi; + fi; ## Check directories if checkdir "$arg1" "$arg2"; then @@ -387,7 +411,7 @@ main() { fi; ## Check apps - checkapp ffprobe; + checkapp ffprobe bkshuf; if ! displayMissing; then showUsage; @@ -395,33 +419,26 @@ main() { fi; yell "STATUS:Working..."; - - # Generate file path list - list_all="$(find -L "$dir_source")"; - #yell "DEBUG:list_files_rel:$list_files_rel"; - # Prune list_all of non-files and save as array list_files + # Populate list_files array while read -r line; do - #yell "DEBUG:line:$line"; - if ! [[ -f $line ]]; then - #yell "DEBUG:Not a file:$line"; - #yell ""; # debug - continue; - fi; - list_files+=("$line"); - done < <(echo "$list_all"); + list_files+=("$line"); + done < <(find -L "$dir_source" -maxdepth "$max_find_depth" -type f | sort); - # Randomly test and add elements of list_files array to list_copy + # Test and add random elements of list_files to list_copy dur=0; # Initialize duration + siz=0; # Initialize size n=0; # Initialize loop counter + dur_cand_w=1; # Init duration digit width counter + siz_cand_w=1; # Init size digit width counter ## Get element count of list_files array - list_files_count="${#list_files[@]}"; - while [[ $dur -le $dur_dest ]]; do - #yell "DEBUG:list_copy building loop:$n"; - ### Select random element of list_files array - list_files_index="$(shuf -i 1-"$list_files_count" -n1)"; - list_files_index="$((list_files_index - 1))"; # bash arrays are zero-indexed - path_candfile="${list_files[$list_files_index]}"; # path of candidate file + file_count="${#list_files[@]}"; + while read -r line && \ + [[ $dur -le $dur_dest ]] && \ + [[ $siz -le $siz_dest ]] && \ + [[ $n -le $file_count ]]; do + #yell "DEBUG:list_copy building loop:$n"; + path_candfile="$line"; # path of candidate file ### Check if has valid codec if ! check_parsable_audio_ffprobe "$path_candfile"; then continue; fi; # reject @@ -433,60 +450,79 @@ main() { ### Check and save duration dur_cand="$(get_media_length "$path_candfile")"; dur_cand="${dur_cand%%.*}"; # convert float to int + if [[ "$((dur + dur_cand))" -gt "$dur_dest" ]]; then continue; fi; # reject + dur_cand_wnow="$(printf "%s" "$dur_cand" | wc -m)"; # duration width count + if [[ $dur_cand_wnow -gt $dur_cand_w ]]; then + dur_cand_w="$dur_cand_wnow"; fi; if ! checkInt "$dur_cand"; then continue; fi; # reject if [[ "$dur_cand" -lt "$min_file_duration" ]]; then continue; fi; # reject - - ### Add/update candfile to list_copy assoc. array (key=path; value=duration) + if [[ "$dur_cand" -gt "$max_file_duration" ]]; then continue; fi; # reject + + ### Check and save size + siz_cand="$(du -b "$path_candfile" | awk '{ print $1 }')"; # size in bytes + siz_cand_wnow="$(printf "%s" "$siz_cand" | wc -m)"; # size width count + if [[ $siz_cand_wnow -gt $siz_cand_w ]]; then + siz_cand_w="$siz_cand_wnow"; fi; + if ! checkInt "$siz_cand"; then continue; fi; # reject + if [[ "$siz_cand" -lt "$min_file_size" ]]; then continue; fi; # reject + if [[ "$siz_cand" -gt "$max_file_size" ]]; then continue; fi; # reject + + ### Add/update candfile to array: + ### list_copy_sa (simple array with only paths) #yell "DEBUG:Adding $path_candfile"; - list_copy["$path_candfile"]="$dur_cand"; + list_copy_sa+=("$dur_cand,$siz_cand,$path_candfile"); # for copying with order - ### Update total duration $dur by summing all list_copy assoc. array values - dur=0; - for value in "${list_copy[@]}"; do - dur="$((dur + value))"; - done; + ### Update total duration $dur and total size $siz + dur="$((dur + dur_cand))"; + siz="$((siz + siz_cand))"; #yell "DEBUG:dur:$dur"; + #yell "DEBUG:siz:$siz"; - ### Sanity check ((n++)); - if [[ $n -gt $max_loops ]]; then die "ERROR:Too many loops:$n"; fi; - done; + done < <(printf "%s\n" "${list_files[@]}" | bkshuf); n=0; # Initialize loop counter + num_w="$(printf "%s" "${#list_copy_sa[@]}" | wc -m)"; # init file number format + num_fmt="%0""$num_w""d"; + path_log_output="$dir_dest"/COPY.log; + printf "num,fingerprint,duration,size,original_path\n" >> "$path_log_output"; # Copy files in list_copy to dir_dest; - for key in "${!list_copy[@]}"; do - value="${list_copy[$key]}"; - ## Get basename of path - file_basename="$(basename "$key")"; - - ## Get 16-character b2sum fingerprint (for different files that share basename) - fingerprint="$(b2sum -l64 "$key" | cut -d' ' -f1)"; - - ## Form output filename - file_name="$fingerprint".."$file_basename"; + while read -r line; do + yell "DEBUG:line:$line"; # debug + fdur="$(printf "%s" "$line" | cut -d',' -f1)"; + fsize="$(printf "%s" "$line" | cut -d',' -f2)"; + fpath="$(printf "%s" "$line" | cut -d',' -f3-)"; + ## Get basename of path + file_basename="$(basename "$fpath")"; + + ## Get 16-character b2sum fingerprint (for different files that share basename) + fingerprint="$(b2sum -l32 "$fpath" | awk '{print $1}' )"; + + ## Form output filename + num="$(printf "$num_fmt" "$n")"; + file_name="$num"_"$fingerprint".."$file_basename"; file_name="${file_name:0:$max_filename_length}"; # Limit filename length (e.g. Windows has max of 255 characters) ## Form output path path_output="$dir_dest"/"$file_name"; ## Copy - try cp "$key" "$path_output" && yell "NOTICE:Copied ($value seconds): $key "; + must cp "$fpath" "$path_output" && yell "NOTICE:Copied ($fdur seconds): $fpath "; #yell "DEBUG:Copied $file_basename to $dur_dest."; ## Append log - path_log_output="$dir_dest"/COPY.log; - if [[ $n -le 0 ]]; then - echo "fingerprint","duration","original_path" >> "$path_log_output"; - else - echo "$fingerprint","$value","$key" >> "$path_log_output"; - fi; + fpath_can="$(readlink -f "$fpath")"; # resolve symlinks to canonical path + log_fmt="%s,%s,%""$dur_cand_w""d,%""$siz_cand_w""d,%s\n"; # e.g. "%s,%3d,%5d,%s" if dur_cand_w=3 and siz_cand_w=5 + #yell "DEBUG:log_fmt:$log_fmt"; sleep 10; # debug + printf "$log_fmt" "$num" "$fingerprint" "$fdur" "$fsize" "$fpath_can" >> "$path_log_output"; ((n++)); unset file_basename path_output - done; + done < <(printf "%s\n" "${list_copy_sa[@]}"); - # Report total duration + # Report total duration and size yell "NOTICE:Total duration (seconds):$dur"; + yell "NOTICE:Total size (bytes):$siz"; } # Main program