2 # Desc: Output random text selection from a text file within a directory 
   3 # Usage: randtxt.sh DIR 
   5 # Depends: Bash 5.1.16, GNU findutils 4.8.0, GNU Coreutils 8.32 
   6 # Example: randtxt.sh ~/Calibre\ Library/ 
   8 SAMPLE
=10000;   # ceiling for number of text files to consider 
   9 CONTEXT
="5000"; # total bytes +1 of text to display 
  10 if [[ $
(( CONTEXT
/2 + CONTEXT
/2 )) -lt $CONTEXT ]]; then ((CONTEXT
++)); fi; 
  13 yell
() { echo "$0: $*" >&2; } # print script path and all args to stderr 
  14 die
() { yell 
"$*"; exit 111; } # same as yell() but non-zero exit status 
  15 must
() { "$@" || die 
"cannot $*"; } # runs args as command, reports args if command fails 
  24 } # Display information on how to use this script. 
  27     if [[ $# -gt 1 ]]; then die 
"FATAL:Too many arguments"; fi; 
  28     if [[ $# -lt 1 ]]; then die 
"FATAL:Not enough arguments"; fi; 
  29     if [[ ! -d "$1" ]]; then die 
"FATAL:Not a dir:$1"; else return 0; fi; 
  32     # Desc: Create list of file sizes and paths 
  33     # input:  arg1   directory path 
  34     # output: array  fileSizeList 
  38     declare -g -a fileSizeList
; 
  39     mapfile 
-d '' -t fileSizeList 
< <( 
  40         find -- "$1" -type f 
-name "*.txt" -printf '%s\t%p\0' | shuf 
-z -n"$SAMPLE"; 
  41     ); # Build array by feeding null-delimited lines from `find` to `mapfile` 
  43     declare -g totalSize
=0; 
  45     for i 
in "${!fileSizeList[@]}"; do 
  46         size
="${fileSizeList[i]%%$'\t'*}"; 
  47         totalSize
=$
((totalSize 
+ size
)); 
  49     if [[ $totalSize -le 0 ]]; then die 
"FATAL:Total size is zero."; fi; 
  52     randPoint
="$(shuf -n1 -i0-$((totalSize-1)); )"; 
  55     # Desc: Print text within fileSizeList around randPoint 
  56     # input: array  fileSizeList 
  60     local sum psum i size fileStart filePoint fileEnd selStart selEnd selCount 
file; 
  61     sum=0; # init byte sum 
  63     for i 
in "${!fileSizeList[@]}"; do 
  64         # Move search to next file 
  65         size
="${fileSizeList[i]%%$'\t'*}"; 
  66         sum=$
(( sum + size 
)); 
  68         # Check if point is within file 
  69         if [[ $sum -gt $randPoint ]]; then 
  70             # Calculate important positions within file (zero-indexed) 
  71             fileStart
=0;                              # first byte within file 
  72             filePoint
="$(( randPoint - psum ))";      # point as byte within file 
  73             fileEnd
="$(( size - 1 ))";                # last byte within file 
  74             selStart
="$(( filePoint - CONTEXT/2 ))";  # start of output selection 
  75             selEnd
="$(( filePoint + CONTEXT/2 ))";    # end of output selection 
  76             # Clamp selection start and end indexes 
  77             if [[ $selStart -lt $fileStart ]]; then selStart
=$fileStart; fi; 
  78             if [[ $selEnd -gt $fileEnd ]]; then selEnd
=$fileEnd; fi; 
  79             selCount
="$(( selEnd - selStart + 1 ))";  # number of bytes within selection 
  81             file="$(cut -f2- <<< "${fileSizeList[i]}"; )"; 
  82             printf "INFO:Sample of:%s\n" "$file" 1>&2; 
  83             tail --bytes=+$
((selStart
+1)) -- "$file" | 
head --bytes=$
((selCount
)); 
  87         psum
=$sum; # store previous sum 
  90 export -f checkInput getFileSizeList getRandText
; 
  93     # Input:  arg1    directory path 
  96     checkInput 
"$@";      # check input arguments 
  97     getFileSizeList 
"$1"; # make fileSizeList array 
  98     getRandText
;          # output text