+#!/bin/bash
+# Desc: Outputs text at randomish position within dir of text files
+# Usage: randtxt.sh DIR
+# Version 0.0.4
+# Depends: Bash 5.1.16, GNU findutils 4.8.0, GNU Coreutils 8.32
+
+SAMPLE=10000;
+CONTEXT="5000"; # total bytes +1 before and after point within file to print
+if [[ $(( CONTEXT/2 + CONTEXT/2 )) -lt $CONTEXT ]]; then ((CONTEXT++)); fi;
+
+
+yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
+die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
+must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails
+showUsage() {
+ cat <<'EOF'
+ USAGE:
+ randtxt.sh DIR
+
+ EXAMPLE:
+ randtxt.sh ~/
+EOF
+} # Display information on how to use this script.
+checkInput() {
+ # input: arg1
+ if [[ $# -gt 1 ]]; then die "FATAL:Too many arguments"; fi;
+ if [[ $# -lt 1 ]]; then die "FATAL:Not enough arguments"; fi;
+ if [[ ! -d "$1" ]]; then die "FATAL:Not a dir:$1"; else return 0; fi;
+ #yell "DEBUG:checkInput() finished."; # debug
+};
+getFileSizeList() {
+ # Desc: Create list of file sizes and paths
+ # input: arg1 directory path
+ # output: array fileSizeList
+ # var totalSize
+ # var randPoint
+
+ declare -g -a fileSizeList;
+ mapfile -d '' -t fileSizeList < <(
+ find -- "$1" -type f -name "*.txt" -printf '%s\t%p\0' | shuf -z -n"$SAMPLE";
+ ); # Build array by feeding null-delimited lines from `find` to `mapfile`
+ #declare -p fileSizeList; # debug
+
+ declare -g totalSize=0;
+ local i size;
+ for i in "${!fileSizeList[@]}"; do
+ #declare -p i; # debug
+ #yell "DEBUG:fileSizeList[i]:${fileSizeList[i]}"; # debug
+ size="${fileSizeList[i]%%$'\t'*}";
+ #declare -p size; # debug
+ totalSize=$((totalSize + size));
+ #declare -p i size totalSize; # debug
+ #yell "=============="; # debug
+ done;
+ if [[ $totalSize -le 0 ]]; then die "FATAL:Total size is zero."; fi;
+
+ declare -g randPoint
+ randPoint="$(shuf -n1 -i0-$((totalSize-1)); )";
+ #declare -p fileSizeList totalSize randPoint 1>&2;
+ #yell "DEBUG:fileSizeList element count:${#fileSizeList[@]}"; # debug
+ #yell "DEBUG:getFileSizeList() finished."; # debug
+};
+getRandText() {
+ # Desc: Print text within fileSizeList around randPoint
+ # input: array fileSizeList
+ # var totalSize
+ # var randPoint
+
+ local sum psum i size fileStart filePoint fileEnd selStart selEnd selCount file;
+ sum=0; # init byte sum
+ psum=0;
+ for i in "${!fileSizeList[@]}"; do
+ # Move search to next file
+ size="${fileSizeList[i]%%$'\t'*}";
+ sum=$(( sum + size ));
+
+ # Check if point is within file
+ if [[ $sum -gt $randPoint ]]; then
+ # Calculate important positions within file (zero-indexed)
+ fileStart=0; # first byte within file
+ filePoint="$(( randPoint - psum ))"; # point as byte within file
+ fileEnd="$(( size - 1 ))"; # last byte within file
+ selStart="$(( filePoint - CONTEXT/2 ))"; # start of output selection
+ selEnd="$(( filePoint + CONTEXT/2 ))"; # end of output selection
+ # Clamp selection start and end indexes
+ if [[ $selStart -lt $fileStart ]]; then selStart=$fileStart; fi;
+ if [[ $selEnd -gt $fileEnd ]]; then selEnd=$fileEnd; fi;
+ selCount="$(( selEnd - selStart + 1 ))"; # number of bytes within selection
+ # Output context
+ file="$(cut -f2- <<< "${fileSizeList[i]}"; )";
+ printf "INFO:Sample of:%s\n" "$file";
+ #head --bytes=$((selEnd + 1 )) -- "$file" | tail --bytes=+$((selStart + 1));
+ tail --bytes=+$((selStart+1)) -- "$file" | head --bytes=$((selCount));
+ printf "\n";
+ return 0;
+ fi;
+ psum=$sum; # store previous sum
+ done;
+ #yell "DEBUG:getRandText() finished."; # debug
+};
+export -f checkInput getFileSizeList getRandText;
+
+main() {
+ # Input: arg1 directory path
+ # Output: stdout text
+
+ checkInput "$@"; # check input arguments
+ getFileSizeList "$1"; # make fileSizeList array
+ getRandText; # output text
+ return 0;
+};
+
+main "$@" && exit 0;