+save_sample() {
+ # Usage: save_sample arg1
+ # Input: arg1 list_paths (list of files to take samples from)
+ # envvar BKFEH_SAMPLE_DIR (environment variable set outside of this script)
+ # Depends: yell(), GNU Parallel, GNU find, GNU Coreutils 8.32 (cut, find, du)
+ local list_paths
+ sample_count="100";
+ sample_max_space="100000000"; # bytes
+
+ if [[ ! -v BKFEH_SAMPLE_DIR ]]; then return 0; fi; # return early if environment var not set.
+
+ if [[ ! -z "$1" ]]; then
+ list_paths="$1"; # newline-delimited list of file paths to sample from
+ else
+ yell "ERROR:NO paths available to sample.";
+ fi;
+
+ if [[ -d "$BKFEH_SAMPLE_DIR" ]]; then
+ sample_dir="$BKFEH_SAMPLE_DIR";
+ yell "STATUS:Environment variable BKFEH_SAMPLE_DIR set. Clearing and saving samples...";
+
+ ## clear previous sample
+ count_samples="$(find $BKFEH_SAMPLE_DIR -maxdepth 1 -type f | wc -l)";
+ find "$BKFEH_SAMPLE_DIR" -maxdepth 1 -type f -exec rm '{}' \; ;
+
+ ## save random sample
+ yell "STATUS:Saving random sample of size $sample_count to $BKFEH_SAMPLE_DIR...";
+ list_paths_sample="$(echo "$list_paths" | shuf | head -n"$sample_count")";
+ while read -r line; do
+ if [[ -z "$line" ]]; then continue; fi;
+ ### check size limit
+ sample_act_space="$(du -bd1 "$BKFEH_SAMPLE_DIR" | cut -f1 )"; # actual used space
+ cand_space="$(du -bd1 "$line" | cut -f1 )"; # size of candidate file to add
+ sample_req_space="$((sample_act_space + cand_space))";
+ if [[ "$sample_req_space" -lt "$sample_max_space" ]]; then
+ #### add file to sample dir
+ cp -n "$line" "$BKFEH_SAMPLE_DIR" ;
+ fi;
+ done < <( echo "$list_paths_sample" );
+ else
+ yell "ERROR:Does not exist: $BKFEH_SAMPLE_DIR";
+ fi;
+}; # save sample of files
+