X-Git-Url: https://zdv2.bktei.com/gitweb/BK-2020-03.git/blobdiff_plain/8d35dc96ccee8a30e5e54873e319395a4c6bfe53..b9e8b771e985fcdf26ba8b9ccb8e31b62da757d3:/user/bk-copy-rand-music?ds=sidebyside diff --git a/user/bk-copy-rand-music b/user/bk-copy-rand-music new file mode 100644 index 0000000..18f95f3 --- /dev/null +++ b/user/bk-copy-rand-music @@ -0,0 +1,532 @@ +#!/usr/bin/env bash +# Desc: Copies random audio files +# Usage: bk-copy-rand-music [dir SOURCE] [dir DEST] [int DURATION] ([int BYTES]) +# Version: 0.1.0 +# Depends: BK-2020-03: bkshuf v0.1.0 + +declare -Ag appRollCall # Associative array for storing app status +declare -Ag fileRollCall # Associative array for storing file status +declare -Ag dirRollCall # Associative array for storing dir status +declare -a music_codecs # Array for storing valid codec names (e.g. "aac" "mp3") + +# Adjustable parameters +music_codecs=("vorbis" "aac" "mp3" "flac" "opus"); # whitelist of valid codec_names ffprobe might return +max_filename_length="255"; # max output filename length +min_file_duration="10"; # minimum duration per music file +max_file_duration="3600"; # maximum duration per music file +min_file_size="100000"; # minimum size per music file (bytes) +max_file_size="100000000"; # maximum size per music file (bytes) +siz_dest="600000000"; # default destination size limit: 600 MB +max_find_depth="10"; # max find depth + + +yell() { echo "$0: $*" >&2; } # print script path and all args to stderr +die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status +must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails +checkapp() { + # Desc: If arg is a command, save result in assoc array 'appRollCall' + # Usage: checkapp arg1 arg2 arg3 ... + # Version: 0.1.1 + # Input: global assoc. array 'appRollCall' + # Output: adds/updates key(value) to global assoc array 'appRollCall' + # Depends: bash 5.0.3 + local returnState + + #===Process Args=== + for arg in "$@"; do + if command -v "$arg" 1>/dev/null 2>&1; then # Check if arg is a valid command + appRollCall[$arg]="true"; + if ! [ "$returnState" = "false" ]; then returnState="true"; fi; + else + appRollCall[$arg]="false"; returnState="false"; + fi; + done; + + #===Determine function return code=== + if [ "$returnState" = "true" ]; then + return 0; + else + return 1; + fi; +} # Check that app exists +checkfile() { + # Desc: If arg is a file path, save result in assoc array 'fileRollCall' + # Usage: checkfile arg1 arg2 arg3 ... + # Version: 0.1.1 + # Input: global assoc. array 'fileRollCall' + # Output: adds/updates key(value) to global assoc array 'fileRollCall'; + # Output: returns 0 if app found, 1 otherwise + # Depends: bash 5.0.3 + local returnState + + #===Process Args=== + for arg in "$@"; do + if [ -f "$arg" ]; then + fileRollCall["$arg"]="true"; + if ! [ "$returnState" = "false" ]; then returnState="true"; fi; + else + fileRollCall["$arg"]="false"; returnState="false"; + fi; + done; + + #===Determine function return code=== + if [ "$returnState" = "true" ]; then + return 0; + else + return 1; + fi; +} # Check that file exists +checkdir() { + # Desc: If arg is a dir path, save result in assoc array 'dirRollCall' + # Usage: checkdir arg1 arg2 arg3 ... + # Version 0.1.2 + # Input: global assoc. array 'dirRollCall' + # Output: adds/updates key(value) to global assoc array 'dirRollCall'; + # Output: returns 0 if all args are dirs; 1 otherwise + # Depends: Bash 5.0.3 + local returnState + + #===Process Args=== + for arg in "$@"; do + if [ -z "$arg" ]; then + dirRollCall["(Unspecified Dirname(s))"]="false"; returnState="false"; + elif [ -d "$arg" ]; then + dirRollCall["$arg"]="true"; + if ! [ "$returnState" = "false" ]; then returnState="true"; fi + else + dirRollCall["$arg"]="false"; returnState="false"; + fi + done + + #===Determine function return code=== + if [ "$returnState" = "true" ]; then + return 0; + else + return 1; + fi +} # Check that dir exists +displayMissing() { + # Desc: Displays missing apps, files, and dirs + # Usage: displayMissing + # Version 1.0.0 + # Input: associative arrays: appRollCall, fileRollCall, dirRollCall + # Output: stderr: messages indicating missing apps, file, or dirs + # Output: returns exit code 0 if nothing missing; 1 otherwise + # Depends: bash 5, checkAppFileDir() + local missingApps value appMissing missingFiles fileMissing + local missingDirs dirMissing + + #==BEGIN Display errors== + #===BEGIN Display Missing Apps=== + missingApps="Missing apps :"; + #for key in "${!appRollCall[@]}"; do echo "DEBUG:$key => ${appRollCall[$key]}"; done + for key in "${!appRollCall[@]}"; do + value="${appRollCall[$key]}"; + if [ "$value" = "false" ]; then + #echo "DEBUG:Missing apps: $key => $value"; + missingApps="$missingApps""$key "; + appMissing="true"; + fi; + done; + if [ "$appMissing" = "true" ]; then # Only indicate if an app is missing. + echo "$missingApps" 1>&2; + fi; + unset value; + #===END Display Missing Apps=== + + #===BEGIN Display Missing Files=== + missingFiles="Missing files:"; + #for key in "${!fileRollCall[@]}"; do echo "DEBUG:$key => ${fileRollCall[$key]}"; done + for key in "${!fileRollCall[@]}"; do + value="${fileRollCall[$key]}"; + if [ "$value" = "false" ]; then + #echo "DEBUG:Missing files: $key => $value"; + missingFiles="$missingFiles""$key "; + fileMissing="true"; + fi; + done; + if [ "$fileMissing" = "true" ]; then # Only indicate if an app is missing. + echo "$missingFiles" 1>&2; + fi; + unset value; + #===END Display Missing Files=== + + #===BEGIN Display Missing Directories=== + missingDirs="Missing dirs:"; + #for key in "${!dirRollCall[@]}"; do echo "DEBUG:$key => ${dirRollCall[$key]}"; done + for key in "${!dirRollCall[@]}"; do + value="${dirRollCall[$key]}"; + if [ "$value" = "false" ]; then + #echo "DEBUG:Missing dirs: $key => $value"; + missingDirs="$missingDirs""$key "; + dirMissing="true"; + fi; + done; + if [ "$dirMissing" = "true" ]; then # Only indicate if an dir is missing. + echo "$missingDirs" 1>&2; + fi; + unset value; + #===END Display Missing Directories=== + + #==END Display errors== + #==BEGIN Determine function return code=== + if [ "$appMissing" == "true" ] || [ "$fileMissing" == "true" ] || [ "$dirMissing" == "true" ]; then + return 1; + else + return 0; + fi + #==END Determine function return code=== +} # Display missing apps, files, dirs +showUsage() { + # Desc: Display script usage information + # Usage: showUsage + # Version 0.0.1 + # Input: none + # Output: stdout + # Depends: GNU-coreutils 8.30 (cat) + cat <<'EOF' + + DESCRIPTION: + This script may be used to copy a random selection of files containing + audio tracks from SOURCE to DEST. + + USAGE: + bk-copy-rand-music [dir SOURCE] [dir DEST] [int DURATION] (int BYTES) + + EXAMPLE: + bk-copy-rand-music ~/Music /tmp/music-sample 3600 + bk-copy-rand-music ~/Music /tmp/music-sample 3600 680000000 + + DEPENDENCIES: + ffprobe + GNU Coreutils 8.30 +EOF +} # Display information on how to use this script. +check_parsable_audio_ffprobe() { + # Desc: Checks if ffprobe returns valid audio codec name for file + # Usage: check_parsable_audio_ffprobe [path FILE] + # Version: 0.0.1 + # Input: arg1: file path + # Output: exit code 0 if returns valid codec name; 1 otherwise + # Depends: ffprobe, die() + local file_in ffprobe_out + + if [[ $# -ne 1 ]]; then die "ERROR:Invalid number of args:$#"; fi; + + file_in="$1"; + + # Check if ffprobe detects an audio stream + if ffprobe -v error -select_streams a -show_entries stream=codec_name -of default=nokey=1:noprint_wrappers=1 "$file_in" 1>/dev/null 2>&1; then + return_state="true"; + else + return_state="false"; + fi; + + # Fail if ffprobe returns no result + ffprobe_out="$(ffprobe -v error -select_streams a -show_entries stream=codec_name -of default=nokey=1:noprint_wrappers=1 "$file_in")"; + if [[ -z $ffprobe_out ]]; then + return_state="false"; + fi; + + # Report exit code + if [[ $return_state = "true" ]]; then + return 0; + else + return 1; + fi; +} # Checks if file has valid codec name using ffprobe +get_audio_format() { + # Desc: Gets audio format of file as string + # Usage: get_audio_format arg1 + # Depends: ffprobe + # Version: 0.0.1 + # Input: arg1: input file path + # Output: stdout (if valid audio format) + # exit code 0 if audio file; 1 otherwise + # Example: get_audio_format myvideo.mp4 + # Note: Would return "opus" if full ffprobe report had 'Audio: opus, 48000 Hz, stereo, fltp' + # Note: Not tested with videos containing multiple video streams + # Ref/Attrib: [1] https://stackoverflow.com/questions/5618363/is-there-a-way-to-use-ffmpeg-to-determine-the-encoding-of-a-file-before-transcod + # [2] https://stackoverflow.com/questions/44123532/how-to-find-out-the-file-extension-for-extracting-audio-tracks-with-ffmpeg-and-p#comment88464070_50723126 + local audio_format file_in; + local return_state; + file_in="$1"; + + # Return error exit code if not audio file + ## Return error if ffprobe itself exited on error + if ! ffprobe -v error -select_streams a -show_entries stream=codec_name -of default=nokey=1:noprint_wrappers=1 "$file_in" 1>/dev/null 2>&1; then + return_state="false"; + fi; + + # Get audio format + audio_format="$(ffprobe -v error -select_streams a -show_entries stream=codec_name -of default=nokey=1:noprint_wrappers=1 "$file_in")"; # see [1] + + ## Return error if audio format is incorrectly formatted (e.g. reject if contains spaces) + pattern="^[[:alnum:]]+$"; # alphanumeric string with no spaces + if [[ $audio_format =~ $pattern ]]; then + return_state="true"; + # Report audio format + echo "$audio_format"; + else + return_state="false"; + fi; + + # Report exit code + if [[ $return_state = "true" ]]; then + return 0; + else + return 1; + fi; +} # Get audio format as stdout +get_media_length() { + # Use ffprobe to get media container length in seconds (float) + # Usage: get_media_length arg1 + # Input: arg1: path to file + # Output: stdout: seconds (float) + # Depends: ffprobe 4.1.8 + # Ref/Attrib: [1] How to get video duration in seconds? https://superuser.com/a/945604 + local file_in + file_in="$1"; + if [[ ! -f $file_in ]]; then + die "ERROR:Not a file:$file_in"; + fi; + ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$file_in"; +} # Get media container length in seconds via stdout +checkInt() { + # Desc: Checks if arg is integer + # Usage: checkInt arg + # Input: arg: integer + # Output: - return code 0 (if arg is integer) + # - return code 1 (if arg is not integer) + # Example: if ! checkInt $arg; then echo "not int"; fi; + # Version: 0.0.1 + local returnState + + #===Process Arg=== + if [[ $# -ne 1 ]]; then + die "ERROR:Invalid number of arguments:$#"; + fi; + + RETEST1='^[0-9]+$'; # Regular Expression to test + if [[ ! $1 =~ $RETEST1 ]] ; then + returnState="false"; + else + returnState="true"; + fi; + + #===Determine function return code=== + if [ "$returnState" = "true" ]; then + return 0; + else + return 1; + fi; +} # Checks if arg is integer +checkIsInArray() { + # Desc: Checks if input arg is element in array + # Usage: checkIsInArray arg1 arg2 + # Version: 0.0.1 + # Input: arg1: test string + # arg2: array + # Output: exit code 0 if test string is in array; 1 otherwise + # Example: checkIsInArray "foo" "${myArray[@]}" + # Ref/Attrib: [1] How do I check if variable is an array? https://stackoverflow.com/a/27254437 + # [2] How to pass an array as function argument? https://askubuntu.com/a/674347 + local return_state input arg1 string_test + declare -a arg2 array_test + input=("$@") # See [2] + arg1="${input[0]}"; + arg2=("${input[@]:1}"); + #yell "DEBUG:input:${input[@]}"; + #yell "DEBUG:arg1:${arg1[@]}"; + #yell "DEBUG:arg2:${arg2[@]}"; + + string_test="$arg1"; + array_test=("${arg2[@]}"); + + #yell "DEBUG:string_test:$string_test"; + #yell "DEBUG:$(declare -p array_test)"; + for element in "${array_test[@]}"; do + #yell "DEBUG:element:$element"; + if [[ "$element" =~ ^"$string_test" ]]; then + return_state="true"; + continue; + fi; + done; + + # Report exit code + if [[ $return_state == "true" ]]; then + return 0; + else + return 1; + fi; +} # Check if string is element in array +main() { + # Desc: Main program + # Input: arg1: path to source tree + # arg2: path to destination tree + # arg3: cumulative duration (seconds) of audio files in destination tree + # arg4: cumulative size (bytes) of audio files in destination tree (optional) + # assoc arrays: appRollCall, fileRollCall, dirRollCall + # env.var: BKSHUF_PARAM_LINEC + # BKSHUF_PARAM_GSIZE + # arrays: music_codecs + # vars: max_filename_length, min_file_duration, max_file_duration, + # min_file_size, max_file_size, siz_dest, max_find_depth + # Output: [none] + # Depends: yell(), checkdir() 0.1.2, displayMissing() 1.0.0, GNU Coreutils 8.30 + # BK-2020-03: bkshuf v0.1.0 + local arg1 arg2 arg3 dur_dest dir_source dir_dest + declare -a list_files # array for files to be considered + declare -a list_copy_sa # simple array for files to be copied (string: "$dur,$path") + + # Parse args + arg1="$1"; + arg2="$2"; + arg3="$3"; + arg4="$4"; + if ! ([[ $# -eq 3 ]] || [[ $# -eq 4 ]]); then showUsage; die "ERROR:Invalid number of args:$#"; fi; + + ## Check duration + if checkInt "$arg3"; then + dur_dest="$arg3"; + else + yell "ERROR:Duration (seconds) not an int:$arg3" + fi; + + ## Check size + if [[ -n "$arg4" ]]; then + if checkInt "$arg4"; then + siz_dest="$arg4"; + else + yell "ERROR:Size (bytes) not an int:$arg4"; + fi; + fi; + + ## Check directories + if checkdir "$arg1" "$arg2"; then + dir_source="$arg1"; + dir_dest="$arg2"; + else + yell "ERROR:Directory error"; + fi; + + ## Check apps + checkapp ffprobe bkshuf; + + if ! displayMissing; then + showUsage; + die "ERROR:Check missing resources."; + fi; + + yell "STATUS:Working..."; + + # Populate list_files array + while read -r line; do + list_files+=("$line"); + done < <(find -L "$dir_source" -maxdepth "$max_find_depth" -type f | sort); + + # Test and add random elements of list_files to list_copy + dur=0; # Initialize duration + siz=0; # Initialize size + n=0; # Initialize loop counter + dur_cand_w=1; # Init duration digit width counter + siz_cand_w=1; # Init size digit width counter + ## Get element count of list_files array + file_count="${#list_files[@]}"; + while read -r line && \ + [[ $dur -le $dur_dest ]] && \ + [[ $siz -le $siz_dest ]] && \ + [[ $n -le $file_count ]]; do + #yell "DEBUG:list_copy building loop:$n"; + path_candfile="$line"; # path of candidate file + + ### Check if has valid codec + if ! check_parsable_audio_ffprobe "$path_candfile"; then continue; fi; # reject + + ### Check if desired codec + file_format="$(get_audio_format "$path_candfile")"; + if ! checkIsInArray "$file_format" "${music_codecs[@]}"; then continue; fi; # reject + + ### Check and save duration + dur_cand="$(get_media_length "$path_candfile")"; + dur_cand="${dur_cand%%.*}"; # convert float to int + if [[ "$((dur + dur_cand))" -gt "$dur_dest" ]]; then continue; fi; # reject + dur_cand_wnow="$(printf "%s" "$dur_cand" | wc -m)"; # duration width count + if [[ $dur_cand_wnow -gt $dur_cand_w ]]; then + dur_cand_w="$dur_cand_wnow"; fi; + if ! checkInt "$dur_cand"; then continue; fi; # reject + if [[ "$dur_cand" -lt "$min_file_duration" ]]; then continue; fi; # reject + if [[ "$dur_cand" -gt "$max_file_duration" ]]; then continue; fi; # reject + + ### Check and save size + siz_cand="$(du -b "$path_candfile" | awk '{ print $1 }')"; # size in bytes + siz_cand_wnow="$(printf "%s" "$siz_cand" | wc -m)"; # size width count + if [[ $siz_cand_wnow -gt $siz_cand_w ]]; then + siz_cand_w="$siz_cand_wnow"; fi; + if ! checkInt "$siz_cand"; then continue; fi; # reject + if [[ "$siz_cand" -lt "$min_file_size" ]]; then continue; fi; # reject + if [[ "$siz_cand" -gt "$max_file_size" ]]; then continue; fi; # reject + + ### Add/update candfile to array: + ### list_copy_sa (simple array with only paths) + #yell "DEBUG:Adding $path_candfile"; + list_copy_sa+=("$dur_cand,$siz_cand,$path_candfile"); # for copying with order + + ### Update total duration $dur and total size $siz + dur="$((dur + dur_cand))"; + siz="$((siz + siz_cand))"; + #yell "DEBUG:dur:$dur"; + #yell "DEBUG:siz:$siz"; + + ((n++)); + done < <(printf "%s\n" "${list_files[@]}" | bkshuf); + + n=0; # Initialize loop counter + num_w="$(printf "%s" "${#list_copy_sa[@]}" | wc -m)"; # init file number format + num_fmt="%0""$num_w""d"; + path_log_output="$dir_dest"/COPY.log; + printf "num,fingerprint,duration,size,original_path\n" >> "$path_log_output"; + # Copy files in list_copy to dir_dest; + while read -r line; do + yell "DEBUG:line:$line"; # debug + fdur="$(printf "%s" "$line" | cut -d',' -f1)"; + fsize="$(printf "%s" "$line" | cut -d',' -f2)"; + fpath="$(printf "%s" "$line" | cut -d',' -f3-)"; + ## Get basename of path + file_basename="$(basename "$fpath")"; + + ## Get 16-character b2sum fingerprint (for different files that share basename) + fingerprint="$(b2sum -l32 "$fpath" | awk '{print $1}' )"; + + ## Form output filename + num="$(printf "$num_fmt" "$n")"; + file_name="$num"_"$fingerprint".."$file_basename"; + file_name="${file_name:0:$max_filename_length}"; # Limit filename length (e.g. Windows has max of 255 characters) + + ## Form output path + path_output="$dir_dest"/"$file_name"; + + ## Copy + must cp "$fpath" "$path_output" && yell "NOTICE:Copied ($fdur seconds): $fpath "; + #yell "DEBUG:Copied $file_basename to $dur_dest."; + + ## Append log + fpath_can="$(readlink -f "$fpath")"; # resolve symlinks to canonical path + log_fmt="%s,%s,%""$dur_cand_w""d,%""$siz_cand_w""d,%s\n"; # e.g. "%s,%3d,%5d,%s" if dur_cand_w=3 and siz_cand_w=5 + #yell "DEBUG:log_fmt:$log_fmt"; sleep 10; # debug + printf "$log_fmt" "$num" "$fingerprint" "$fdur" "$fsize" "$fpath_can" >> "$path_log_output"; + + ((n++)); + unset file_basename path_output + done < <(printf "%s\n" "${list_copy_sa[@]}"); + + # Report total duration and size + yell "NOTICE:Total duration (seconds):$dur"; + yell "NOTICE:Total size (bytes):$siz"; + +} # Main program + +main "$@"; + +# Author: Steven Baltakatei Sandoval +# License: GPLv3+