From: Steven Baltakatei Sandoval Date: Tue, 14 Feb 2023 13:24:38 +0000 (+0000) Subject: feat(user/bkfeh):Use unique names for sample files X-Git-Tag: 0.8.1~11 X-Git-Url: https://zdv2.bktei.com/gitweb/BK-2020-03.git/commitdiff_plain/82fabd0214119cbc81c09edc2ba64418b77e00eb?ds=sidebyside;hp=-c feat(user/bkfeh):Use unique names for sample files - Note: Use both file order and file b2sum 32-bit hashes --- 82fabd0214119cbc81c09edc2ba64418b77e00eb diff --git a/user/bkfeh b/user/bkfeh index 0f8154a..911d51b 100755 --- a/user/bkfeh +++ b/user/bkfeh @@ -1,8 +1,8 @@ #!/usr/bin/env bash # Desc: Wrapper for feh that accepts directory paths via posargs or stdin lines. -# Version: 0.2.0 +# Version: 0.3.0 # Ref/Attrib: [1] Tange, Ole. GNU Parallel with Bash Array. 2019-03-24. https://unix.stackexchange.com/a/508365/411854 -# Depends: GNU Parallel, GNU Bash v5.1.16, feh 3.6.3 +# Depends: GNU Parallel, GNU Bash v5.1.16, feh 3.6.3, GNU Coreutils 8.32 (b2sum) #===Declare local functions=== yell() { echo "$0: $*" >&2; } # print script path and all args to stderr @@ -153,7 +153,10 @@ displayMissing() { #==END Display errors== } # Display missing apps, files, dirs check_depends() { - if ! checkapp feh parallel; then displayMissing; die "FATAL:Missing apps."; fi; + if ! checkapp feh parallel bkshuf b2sum; then + displayMissing; + die "FATAL:Missing apps."; + fi; return 1; }; # check dependencies checkInt() { @@ -249,7 +252,8 @@ save_sample() { # Input: arg1 list_paths (list of files to take samples from) # envvar BKFEH_SAMPLE_DIR (environment variable set outside of this script) # envvar BKFEH_SAMPLE_SIZE (space limit for sample dir files) - # Depends: yell(), GNU Parallel, GNU find, GNU Coreutils 8.32 (cut, find, du) + # Depends: GNU Parallel, GNU find, GNU Coreutils 8.32 (cut, find, du) + # BK-2020-03: bkshuf (0.0.1), yell() local list_paths sample_count="100"; # max number of images to put in sample dir sample_max_space="10000000"; # max bytes to put in sample dir @@ -263,33 +267,55 @@ save_sample() { sample_count="$BKFEH_SAMPLE_COUNT"; fi; - if [[ ! -z "$1" ]]; then + if [[ -n "$1" ]]; then list_paths="$1"; # newline-delimited list of file paths to sample from else yell "ERROR:NO paths available to sample."; fi; if [[ -d "$BKFEH_SAMPLE_DIR" ]]; then - sample_dir="$BKFEH_SAMPLE_DIR"; + #sample_dir="$BKFEH_SAMPLE_DIR"; yell "STATUS:Environment variable BKFEH_SAMPLE_DIR set. Clearing and saving samples..."; ## clear previous sample - count_samples="$(find $BKFEH_SAMPLE_DIR -maxdepth 1 -type f | wc -l)"; + count_prev_samples="$(find "$BKFEH_SAMPLE_DIR" -maxdepth 1 -type f | wc -l)"; + yell "STATUS:Deleting $count_prev_samples previous samples..."; find "$BKFEH_SAMPLE_DIR" -maxdepth 1 -type f -exec rm '{}' \; ; ## save random sample yell "STATUS:Saving random sample of size $sample_count to $BKFEH_SAMPLE_DIR..."; - list_paths_sample="$(echo "$list_paths" | shuf | head -n"$sample_count")"; + list_paths_sample="$(echo "$list_paths" | bkshuf "$sample_count" | head -n"$sample_count")"; + n_samp=0; # init sample file counter + sample_log="$BKFEH_SAMPLE_DIR"/paths.txt; + printf "%s,%s,%s\n" "n_samp" "file_hash" "file_path" >> "$sample_log"; while read -r line; do if [[ -z "$line" ]]; then continue; fi; ### check size limit sample_act_space="$(du -bd1 "$BKFEH_SAMPLE_DIR" | cut -f1 )"; # actual used space cand_space="$(du -bd1 "$line" | cut -f1 )"; # size of candidate file to add sample_req_space="$((sample_act_space + cand_space))"; + + ### Customize file names + n_samp_w="$(printf "%s" "$sample_count" | wc -c)"; + n_samp_fmt="%0""$n_samp_w""d"; + n_samp_dd="$(printf "$n_samp_fmt" "$n_samp")"; # sample number fixed-width + file_path="$line"; + #file_dir="$(dirname "$line")"; + file_name="$(basename "$line")"; + file_hash="$(b2sum -l32 "$line" | awk '{print $1}')"; # use file hash to avoid clobbering + file_ext="${file_name##*.}"; + file_name="${file_name%.*}"; + file_shortname="${file_name:0:32}"; + file_name_new="$n_samp_dd"_"$file_hash".."$file_shortname"."$file_ext"; + file_path_new="$BKFEH_SAMPLE_DIR"/"$file_name_new" if [[ "$sample_req_space" -lt "$sample_max_space" ]]; then #### add file to sample dir - cp -n "$line" "$BKFEH_SAMPLE_DIR" ; + must cp -n "$file_path" "$file_path_new"; + #### note path in sample dir log + printf "%s,%s,%s\n" "$n_samp_dd" "$file_hash" "$file_path" \ + >> "$sample_log"; fi; + ((n_samp++)); done < <( echo "$list_paths_sample" ); else yell "ERROR:Does not exist: $BKFEH_SAMPLE_DIR";