#!/usr/bin/env bash
# Desc: Copies random audio files
# Usage: bk-copy-rand-music [dir SOURCE] [dir DEST] [int DURATION] ([int BYTES])
-# Version: 0.1.0
+# Version: 0.4.0
# Depends: BK-2020-03: bkshuf v0.1.0
declare -Ag appRollCall # Associative array for storing app status
declare -a music_codecs # Array for storing valid codec names (e.g. "aac" "mp3")
# Adjustable parameters
-music_codecs=("vorbis" "aac" "mp3" "flac" "opus"); # whitelist of valid codec_names ffprobe might return
+music_codecs=("vorbis" "aac" "mp3" "flac" "opus" "eac3"); # whitelist of valid codec_names ffprobe might return
+ext_ignore=".ots\$|.mid\$|.json\$|.gz\$|.jpg\$|.png\$|.asc\$|.pdf\$|.txt\$|.vtt\$|\.SUM|.zip\$|.xz\$|.org\$|.txt\$"; # blacklist of file extensions for 'grep -Evi'
max_filename_length="255"; # max output filename length
-min_file_duration="10"; # minimum duration per music file
+min_file_duration="30"; # minimum duration per music file
max_file_duration="3600"; # maximum duration per music file
min_file_size="100000"; # minimum size per music file (bytes)
max_file_size="100000000"; # maximum size per music file (bytes)
siz_dest="600000000"; # default destination size limit: 600 MB
max_find_depth="10"; # max find depth
+# Load env vars (bkshuf defaults for typical music albums)
+if [[ ! -v BKSHUF_PARAM_LINEC ]]; then export BKSHUF_PARAM_LINEC=1000000; fi;
+if [[ ! -v BKSHUF_PARAM_GSIZE ]]; then export BKSHUF_PARAM_GSIZE=10; fi;
yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
DEPENDENCIES:
ffprobe
GNU Coreutils 8.30
+
+ ENVIRONMENT VARIABLES
+ BKSHUF_PARAM_LINEC (see `bkshuf` in BK-2020-03)
+ BKSHUF_PARAM_GSIZE (see `bkshuf` in BK-2020-03)
EOF
} # Display information on how to use this script.
check_parsable_audio_ffprobe() {
# arg3: cumulative duration (seconds) of audio files in destination tree
# arg4: cumulative size (bytes) of audio files in destination tree (optional)
# assoc arrays: appRollCall, fileRollCall, dirRollCall
- # env.var: BKSHUF_PARAM_LINEC
- # BKSHUF_PARAM_GSIZE
+ # env.var: BKSHUF_PARAM_LINEC (bkshuf)
+ # BKSHUF_PARAM_GSIZE (bkshuf)
# arrays: music_codecs
# vars: max_filename_length, min_file_duration, max_file_duration,
# min_file_size, max_file_size, siz_dest, max_find_depth
# BK-2020-03: bkshuf v0.1.0
local arg1 arg2 arg3 dur_dest dir_source dir_dest
declare -a list_files # array for files to be considered
- declare -a list_copy_sa # simple array for files to be copied (string: "$dur,$path")
+ declare -a list_copy # array for files to be copied (string: "$dur,$fsize,$path")
# Parse args
arg1="$1";
arg2="$2";
arg3="$3";
arg4="$4";
- if ! ([[ $# -eq 3 ]] || [[ $# -eq 4 ]]); then showUsage; die "ERROR:Invalid number of args:$#"; fi;
-
+ if ! { [[ $# -eq 3 ]] || [[ $# -eq 4 ]]; }; then
+ showUsage;
+ die "ERROR:Invalid number of args:$#"; fi;
+
+ # Check env vars
+ if ! checkInt "$BKSHUF_PARAM_LINEC"; then
+ die "FATAL:Not an int:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_LINEC"; fi;
+ if ! checkInt "$BKSHUF_PARAM_GSIZE"; then
+ die "FATAL:Not an int:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_GSIZE"; fi;
+
## Check duration
if checkInt "$arg3"; then
dur_dest="$arg3";
else
- yell "ERROR:Duration (seconds) not an int:$arg3"
+ die "FATAL:Duration (seconds) not an int:$arg3"
fi;
## Check size
if checkInt "$arg4"; then
siz_dest="$arg4";
else
- yell "ERROR:Size (bytes) not an int:$arg4";
+ die "FATAL:Size (bytes) not an int:$arg4";
fi;
fi;
# Populate list_files array
while read -r line; do
list_files+=("$line");
- done < <(find -L "$dir_source" -maxdepth "$max_find_depth" -type f | sort);
+ done < <(find -L "$dir_source" -maxdepth "$max_find_depth" -type f | \
+ grep -Ev "$ext_ignore" | \
+ sort);
# Test and add random elements of list_files to list_copy
dur=0; # Initialize duration
## Get element count of list_files array
file_count="${#list_files[@]}";
while read -r line && \
- [[ $dur -le $dur_dest ]] && \
- [[ $siz -le $siz_dest ]] && \
+ [[ $dur -le $((dur_dest * 95 / 100)) ]] && \
+ [[ $siz -le $((siz_dest * 95 / 100)) ]] && \
[[ $n -le $file_count ]]; do
- #yell "DEBUG:list_copy building loop:$n";
+ ((n++));
+
+ yell "DEBUG:list_copy building loop:$n/$file_count"; # debug
+ printf "DEBUG:%8d,%8d,%8d/%8d,%8d/%8d\n" "$dur_cand" "$siz_cand" "$dur" "$dur_dest" "$siz" "$siz_dest"; # debug
+
path_candfile="$line"; # path of candidate file
+ ### Check size
+ siz_cand="$(du -b "$path_candfile" | awk '{ print $1 }')"; # size in bytes
+ if ! checkInt "$siz_cand"; then continue; fi; # reject
+ if [[ "$((siz + siz_cand))" -gt "$siz_dest" ]]; then continue; fi; # reject
+ if [[ "$siz_cand" -lt "$min_file_size" ]]; then continue; fi; # reject
+ if [[ "$siz_cand" -gt "$max_file_size" ]]; then continue; fi; # reject
+
### Check if has valid codec
if ! check_parsable_audio_ffprobe "$path_candfile"; then continue; fi; # reject
file_format="$(get_audio_format "$path_candfile")";
if ! checkIsInArray "$file_format" "${music_codecs[@]}"; then continue; fi; # reject
- ### Check and save duration
+ ### Check duration
dur_cand="$(get_media_length "$path_candfile")";
dur_cand="${dur_cand%%.*}"; # convert float to int
+ if ! checkInt "$dur_cand"; then continue; fi; # reject
if [[ "$((dur + dur_cand))" -gt "$dur_dest" ]]; then continue; fi; # reject
- dur_cand_wnow="$(printf "%s" "$dur_cand" | wc -m)"; # duration width count
- if [[ $dur_cand_wnow -gt $dur_cand_w ]]; then
- dur_cand_w="$dur_cand_wnow"; fi;
- if ! checkInt "$dur_cand"; then continue; fi; # reject
if [[ "$dur_cand" -lt "$min_file_duration" ]]; then continue; fi; # reject
if [[ "$dur_cand" -gt "$max_file_duration" ]]; then continue; fi; # reject
- ### Check and save size
- siz_cand="$(du -b "$path_candfile" | awk '{ print $1 }')"; # size in bytes
+ ### Update stats digits widths
+ #### duration
+ dur_cand_wnow="$(printf "%s" "$dur_cand" | wc -m)"; # duration width count
+ if [[ $dur_cand_wnow -gt $dur_cand_w ]]; then
+ dur_cand_w="$dur_cand_wnow"; fi;
+ #### size
siz_cand_wnow="$(printf "%s" "$siz_cand" | wc -m)"; # size width count
if [[ $siz_cand_wnow -gt $siz_cand_w ]]; then
siz_cand_w="$siz_cand_wnow"; fi;
- if ! checkInt "$siz_cand"; then continue; fi; # reject
- if [[ "$siz_cand" -lt "$min_file_size" ]]; then continue; fi; # reject
- if [[ "$siz_cand" -gt "$max_file_size" ]]; then continue; fi; # reject
-
+
### Add/update candfile to array:
- ### list_copy_sa (simple array with only paths)
+ ### list_copy (array with "duration, size, path")
#yell "DEBUG:Adding $path_candfile";
- list_copy_sa+=("$dur_cand,$siz_cand,$path_candfile"); # for copying with order
+ printf "DEBUG:%8d,%8d,%s\n" "$dur_cand" "$siz_cand" "$path_candfile" 1>&2;
+ #printf "DEBUG:dur:%s\n" "$dur" 1>&2;
+ #printf "DEBUG:siz:%s\n" "$siz" 1>&2;
+ list_copy+=("$dur_cand,$siz_cand,$path_candfile"); # for copying with order
### Update total duration $dur and total size $siz
dur="$((dur + dur_cand))";
siz="$((siz + siz_cand))";
- #yell "DEBUG:dur:$dur";
- #yell "DEBUG:siz:$siz";
-
- ((n++));
+ yell "DEBUG:dur:$dur";
+ yell "DEBUG:siz:$siz";
done < <(printf "%s\n" "${list_files[@]}" | bkshuf);
+ #yell "DEBUG:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_LINEC";
+ #yell "DEBUG:BKSHUF_PARAM_GSIZE:$BKSHUF_PARAM_GSIZE";
+
n=0; # Initialize loop counter
- num_w="$(printf "%s" "${#list_copy_sa[@]}" | wc -m)"; # init file number format
+ num_w="$(printf "%s" "${#list_copy[@]}" | wc -m)"; # init file number format
num_fmt="%0""$num_w""d";
path_log_output="$dir_dest"/COPY.log;
printf "num,fingerprint,duration,size,original_path\n" >> "$path_log_output";
# Copy files in list_copy to dir_dest;
while read -r line; do
- yell "DEBUG:line:$line"; # debug
+ #yell "DEBUG:line:$line"; # debug
fdur="$(printf "%s" "$line" | cut -d',' -f1)";
fsize="$(printf "%s" "$line" | cut -d',' -f2)";
fpath="$(printf "%s" "$line" | cut -d',' -f3-)";
## Get basename of path
file_basename="$(basename "$fpath")";
+ ### Get basename without unprintable non-ASCII characters
+ file_basename_compat="$(printf "%s" "$file_basename" | tr -dc '[:graph:][:space:]' )";
## Get 16-character b2sum fingerprint (for different files that share basename)
fingerprint="$(b2sum -l32 "$fpath" | awk '{print $1}' )";
## Form output filename
num="$(printf "$num_fmt" "$n")";
- file_name="$num"_"$fingerprint".."$file_basename";
+ file_name="$num"_"$fingerprint".."$file_basename_compat";
file_name="${file_name:0:$max_filename_length}"; # Limit filename length (e.g. Windows has max of 255 characters)
## Form output path
path_output="$dir_dest"/"$file_name";
## Copy
- must cp "$fpath" "$path_output" && yell "NOTICE:Copied ($fdur seconds): $fpath ";
+ must cp "$fpath" "$path_output" && yell "NOTICE:Copied ($(printf "%""$dur_cand_w"d "$fdur") seconds): $fpath ";
#yell "DEBUG:Copied $file_basename to $dur_dest.";
## Append log
fpath_can="$(readlink -f "$fpath")"; # resolve symlinks to canonical path
log_fmt="%s,%s,%""$dur_cand_w""d,%""$siz_cand_w""d,%s\n"; # e.g. "%s,%3d,%5d,%s" if dur_cand_w=3 and siz_cand_w=5
- #yell "DEBUG:log_fmt:$log_fmt"; sleep 10; # debug
printf "$log_fmt" "$num" "$fingerprint" "$fdur" "$fsize" "$fpath_can" >> "$path_log_output";
((n++));
- unset file_basename path_output
- done < <(printf "%s\n" "${list_copy_sa[@]}");
+ unset file_basename file_basename_compat path_output;
+ done < <(printf "%s\n" "${list_copy[@]}");
# Report total duration and size
yell "NOTICE:Total duration (seconds):$dur";
# Author: Steven Baltakatei Sandoval
# License: GPLv3+
+
+# bkshuf v0.1.0
+# Author: Steven Baltakatei Sandoval
+# License: GPLv3+
+# URL: https://gitlab.com/baltakatei/baltakatei-exdev/-/blob/b9e8b771e985fcdf26ba8b9ccb8e31b62da757d3/unitproc/bkshuf