X-Git-Url: https://zdv2.bktei.com/gitweb/BK-2020-03.git/blobdiff_plain/f943b9f29ad49237b05278807f86ae93d55655a4..43710ad18a71e0168ccec082180bae98a23cb529:/user/randtxt.sh diff --git a/user/randtxt.sh b/user/randtxt.sh new file mode 100755 index 0000000..c809246 --- /dev/null +++ b/user/randtxt.sh @@ -0,0 +1,113 @@ +#!/bin/bash +# Desc: Outputs text at randomish position within dir of text files +# Usage: randtxt.sh DIR +# Version 0.0.4 +# Depends: Bash 5.1.16, GNU findutils 4.8.0, GNU Coreutils 8.32 + +SAMPLE=10000; +CONTEXT="5000"; # total bytes +1 before and after point within file to print +if [[ $(( CONTEXT/2 + CONTEXT/2 )) -lt $CONTEXT ]]; then ((CONTEXT++)); fi; + + +yell() { echo "$0: $*" >&2; } # print script path and all args to stderr +die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status +must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails +showUsage() { + cat <<'EOF' + USAGE: + randtxt.sh DIR + + EXAMPLE: + randtxt.sh ~/ +EOF +} # Display information on how to use this script. +checkInput() { + # input: arg1 + if [[ $# -gt 1 ]]; then die "FATAL:Too many arguments"; fi; + if [[ $# -lt 1 ]]; then die "FATAL:Not enough arguments"; fi; + if [[ ! -d "$1" ]]; then die "FATAL:Not a dir:$1"; else return 0; fi; + #yell "DEBUG:checkInput() finished."; # debug +}; +getFileSizeList() { + # Desc: Create list of file sizes and paths + # input: arg1 directory path + # output: array fileSizeList + # var totalSize + # var randPoint + + declare -g -a fileSizeList; + mapfile -d '' -t fileSizeList < <( + find -- "$1" -type f -name "*.txt" -printf '%s\t%p\0' | shuf -z -n"$SAMPLE"; + ); # Build array by feeding null-delimited lines from `find` to `mapfile` + #declare -p fileSizeList; # debug + + declare -g totalSize=0; + local i size; + for i in "${!fileSizeList[@]}"; do + #declare -p i; # debug + #yell "DEBUG:fileSizeList[i]:${fileSizeList[i]}"; # debug + size="${fileSizeList[i]%%$'\t'*}"; + #declare -p size; # debug + totalSize=$((totalSize + size)); + #declare -p i size totalSize; # debug + #yell "=============="; # debug + done; + if [[ $totalSize -le 0 ]]; then die "FATAL:Total size is zero."; fi; + + declare -g randPoint + randPoint="$(shuf -n1 -i0-$((totalSize-1)); )"; + #declare -p fileSizeList totalSize randPoint 1>&2; + #yell "DEBUG:fileSizeList element count:${#fileSizeList[@]}"; # debug + #yell "DEBUG:getFileSizeList() finished."; # debug +}; +getRandText() { + # Desc: Print text within fileSizeList around randPoint + # input: array fileSizeList + # var totalSize + # var randPoint + + local sum psum i size fileStart filePoint fileEnd selStart selEnd selCount file; + sum=0; # init byte sum + psum=0; + for i in "${!fileSizeList[@]}"; do + # Move search to next file + size="${fileSizeList[i]%%$'\t'*}"; + sum=$(( sum + size )); + + # Check if point is within file + if [[ $sum -gt $randPoint ]]; then + # Calculate important positions within file (zero-indexed) + fileStart=0; # first byte within file + filePoint="$(( randPoint - psum ))"; # point as byte within file + fileEnd="$(( size - 1 ))"; # last byte within file + selStart="$(( filePoint - CONTEXT/2 ))"; # start of output selection + selEnd="$(( filePoint + CONTEXT/2 ))"; # end of output selection + # Clamp selection start and end indexes + if [[ $selStart -lt $fileStart ]]; then selStart=$fileStart; fi; + if [[ $selEnd -gt $fileEnd ]]; then selEnd=$fileEnd; fi; + selCount="$(( selEnd - selStart + 1 ))"; # number of bytes within selection + # Output context + file="$(cut -f2- <<< "${fileSizeList[i]}"; )"; + printf "INFO:Sample of:%s\n" "$file"; + #head --bytes=$((selEnd + 1 )) -- "$file" | tail --bytes=+$((selStart + 1)); + tail --bytes=+$((selStart+1)) -- "$file" | head --bytes=$((selCount)); + printf "\n"; + return 0; + fi; + psum=$sum; # store previous sum + done; + #yell "DEBUG:getRandText() finished."; # debug +}; +export -f checkInput getFileSizeList getRandText; + +main() { + # Input: arg1 directory path + # Output: stdout text + + checkInput "$@"; # check input arguments + getFileSizeList "$1"; # make fileSizeList array + getRandText; # output text + return 0; +}; + +main "$@" && exit 0;