2 # Desc: Outputs text at randomish position within dir of text files
3 # Usage: randtxt.sh DIR
5 # Depends: Bash 5.1.16, GNU findutils 4.8.0, GNU Coreutils 8.32
8 CONTEXT
="5000"; # total bytes +1 before and after point within file to print
9 if [[ $
(( CONTEXT
/2 + CONTEXT
/2 )) -lt $CONTEXT ]]; then ((CONTEXT
++)); fi;
12 yell
() { echo "$0: $*" >&2; } # print script path and all args to stderr
13 die
() { yell
"$*"; exit 111; } # same as yell() but non-zero exit status
14 must
() { "$@" || die
"cannot $*"; } # runs args as command, reports args if command fails
23 } # Display information on how to use this script.
26 if [[ $# -gt 1 ]]; then die
"FATAL:Too many arguments"; fi;
27 if [[ $# -lt 1 ]]; then die
"FATAL:Not enough arguments"; fi;
28 if [[ ! -d "$1" ]]; then die
"FATAL:Not a dir:$1"; else return 0; fi;
29 #yell "DEBUG:checkInput() finished."; # debug
32 # Desc: Create list of file sizes and paths
33 # input: arg1 directory path
34 # output: array fileSizeList
38 declare -g -a fileSizeList
;
39 mapfile
-d '' -t fileSizeList
< <(
40 find -- "$1" -type f
-name "*.txt" -printf '%s\t%p\0' | shuf
-z -n"$SAMPLE";
41 ); # Build array by feeding null-delimited lines from `find` to `mapfile`
42 #declare -p fileSizeList; # debug
44 declare -g totalSize
=0;
46 for i
in "${!fileSizeList[@]}"; do
47 #declare -p i; # debug
48 #yell "DEBUG:fileSizeList[i]:${fileSizeList[i]}"; # debug
49 size
="${fileSizeList[i]%%$'\t'*}";
50 #declare -p size; # debug
51 totalSize
=$
((totalSize
+ size
));
52 #declare -p i size totalSize; # debug
53 #yell "=============="; # debug
55 if [[ $totalSize -le 0 ]]; then die
"FATAL:Total size is zero."; fi;
58 randPoint
="$(shuf -n1 -i0-$((totalSize-1)); )";
59 #declare -p fileSizeList totalSize randPoint 1>&2;
60 #yell "DEBUG:fileSizeList element count:${#fileSizeList[@]}"; # debug
61 #yell "DEBUG:getFileSizeList() finished."; # debug
64 # Desc: Print text within fileSizeList around randPoint
65 # input: array fileSizeList
69 local sum psum i size fileStart filePoint fileEnd selStart selEnd selCount
file;
70 sum=0; # init byte sum
72 for i
in "${!fileSizeList[@]}"; do
73 # Move search to next file
74 size
="${fileSizeList[i]%%$'\t'*}";
75 sum=$
(( sum + size
));
77 # Check if point is within file
78 if [[ $sum -gt $randPoint ]]; then
79 # Calculate important positions within file (zero-indexed)
80 fileStart
=0; # first byte within file
81 filePoint
="$(( randPoint - psum ))"; # point as byte within file
82 fileEnd
="$(( size - 1 ))"; # last byte within file
83 selStart
="$(( filePoint - CONTEXT/2 ))"; # start of output selection
84 selEnd
="$(( filePoint + CONTEXT/2 ))"; # end of output selection
85 # Clamp selection start and end indexes
86 if [[ $selStart -lt $fileStart ]]; then selStart
=$fileStart; fi;
87 if [[ $selEnd -gt $fileEnd ]]; then selEnd
=$fileEnd; fi;
88 selCount
="$(( selEnd - selStart + 1 ))"; # number of bytes within selection
90 file="$(cut -f2- <<< "${fileSizeList[i]}"; )";
91 printf "INFO:Sample of:%s\n" "$file";
92 #head --bytes=$((selEnd + 1 )) -- "$file" | tail --bytes=+$((selStart + 1));
93 tail --bytes=+$
((selStart
+1)) -- "$file" |
head --bytes=$
((selCount
));
97 psum
=$sum; # store previous sum
99 #yell "DEBUG:getRandText() finished."; # debug
101 export -f checkInput getFileSizeList getRandText
;
104 # Input: arg1 directory path
105 # Output: stdout text
107 checkInput
"$@"; # check input arguments
108 getFileSizeList
"$1"; # make fileSizeList array
109 getRandText
; # output text