2 # Desc: Output random text selection from a text file within a directory
3 # Usage: randtxt.sh DIR
5 # Depends: Bash 5.1.16, GNU findutils 4.8.0, GNU Coreutils 8.32
6 # Example: randtxt.sh ~/Calibre\ Library/
8 SAMPLE
=10000; # ceiling for number of text files to consider
9 CONTEXT
="5000"; # total bytes +1 of text to display
10 if [[ $
(( CONTEXT
/2 + CONTEXT
/2 )) -lt $CONTEXT ]]; then ((CONTEXT
++)); fi;
13 yell
() { echo "$0: $*" >&2; } # print script path and all args to stderr
14 die
() { yell
"$*"; exit 111; } # same as yell() but non-zero exit status
15 must
() { "$@" || die
"cannot $*"; } # runs args as command, reports args if command fails
24 } # Display information on how to use this script.
27 if [[ $# -gt 1 ]]; then die
"FATAL:Too many arguments"; fi;
28 if [[ $# -lt 1 ]]; then die
"FATAL:Not enough arguments"; fi;
29 if [[ ! -d "$1" ]]; then die
"FATAL:Not a dir:$1"; else return 0; fi;
32 # Desc: Create list of file sizes and paths
33 # input: arg1 directory path
34 # output: array fileSizeList
38 declare -g -a fileSizeList
;
39 mapfile
-d '' -t fileSizeList
< <(
40 find -- "$1" -type f
-name "*.txt" -printf '%s\t%p\0' | shuf
-z -n"$SAMPLE";
41 ); # Build array by feeding null-delimited lines from `find` to `mapfile`
43 declare -g totalSize
=0;
45 for i
in "${!fileSizeList[@]}"; do
46 size
="${fileSizeList[i]%%$'\t'*}";
47 totalSize
=$
((totalSize
+ size
));
49 if [[ $totalSize -le 0 ]]; then die
"FATAL:Total size is zero."; fi;
52 randPoint
="$(shuf -n1 -i0-$((totalSize-1)); )";
55 # Desc: Print text within fileSizeList around randPoint
56 # input: array fileSizeList
60 local sum psum i size fileStart filePoint fileEnd selStart selEnd selCount
file;
61 sum=0; # init byte sum
63 for i
in "${!fileSizeList[@]}"; do
64 # Move search to next file
65 size
="${fileSizeList[i]%%$'\t'*}";
66 sum=$
(( sum + size
));
68 # Check if point is within file
69 if [[ $sum -gt $randPoint ]]; then
70 # Calculate important positions within file (zero-indexed)
71 fileStart
=0; # first byte within file
72 filePoint
="$(( randPoint - psum ))"; # point as byte within file
73 fileEnd
="$(( size - 1 ))"; # last byte within file
74 selStart
="$(( filePoint - CONTEXT/2 ))"; # start of output selection
75 selEnd
="$(( filePoint + CONTEXT/2 ))"; # end of output selection
76 # Clamp selection start and end indexes
77 if [[ $selStart -lt $fileStart ]]; then selStart
=$fileStart; fi;
78 if [[ $selEnd -gt $fileEnd ]]; then selEnd
=$fileEnd; fi;
79 selCount
="$(( selEnd - selStart + 1 ))"; # number of bytes within selection
81 file="$(cut -f2- <<< "${fileSizeList[i]}"; )";
82 printf "INFO:Sample of:%s\n" "$file" 1>&2;
83 tail --bytes=+$
((selStart
+1)) -- "$file" |
head --bytes=$
((selCount
));
87 psum
=$sum; # store previous sum
90 export -f checkInput getFileSizeList getRandText
;
93 # Input: arg1 directory path
96 checkInput
"$@"; # check input arguments
97 getFileSizeList
"$1"; # make fileSizeList array
98 getRandText
; # output text