]> zdv2.bktei.com Git - BK-2020-03.git/blob - user/randtxt.sh
feat(user/mw_wc2sp.sh):Use top and bottom nav links
[BK-2020-03.git] / user / randtxt.sh
1 #!/bin/bash
2 # Desc: Output random text selection from a text file within a directory
3 # Usage: randtxt.sh DIR
4 # Version 0.0.6
5 # Depends: Bash 5.1.16, GNU findutils 4.8.0, GNU Coreutils 8.32
6 # Example: randtxt.sh ~/Calibre\ Library/
7
8 SAMPLE=10000; # ceiling for number of text files to consider
9 CONTEXT="5000"; # total bytes +1 of text to display
10 if [[ $(( CONTEXT/2 + CONTEXT/2 )) -lt $CONTEXT ]]; then ((CONTEXT++)); fi;
11
12
13 yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
14 die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
15 must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails
16 showUsage() {
17 cat <<'EOF'
18 USAGE:
19 randtxt.sh DIR
20
21 EXAMPLE:
22 randtxt.sh ~/
23 EOF
24 } # Display information on how to use this script.
25 checkInput() {
26 # input: arg1
27 if [[ $# -gt 1 ]]; then die "FATAL:Too many arguments"; fi;
28 if [[ $# -lt 1 ]]; then die "FATAL:Not enough arguments"; fi;
29 if [[ ! -d "$1" ]]; then die "FATAL:Not a dir:$1"; else return 0; fi;
30 };
31 getFileSizeList() {
32 # Desc: Create list of file sizes and paths
33 # input: arg1 directory path
34 # output: array fileSizeList
35 # var totalSize
36 # var randPoint
37
38 declare -g -a fileSizeList;
39 mapfile -d '' -t fileSizeList < <(
40 find -- "$1" -type f -name "*.txt" -printf '%s\t%p\0' | shuf -z -n"$SAMPLE";
41 ); # Build array by feeding null-delimited lines from `find` to `mapfile`
42
43 declare -g totalSize=0;
44 local i size;
45 for i in "${!fileSizeList[@]}"; do
46 size="${fileSizeList[i]%%$'\t'*}";
47 totalSize=$((totalSize + size));
48 done;
49 if [[ $totalSize -le 0 ]]; then die "FATAL:Total size is zero."; fi;
50
51 declare -g randPoint
52 randPoint="$(shuf -n1 -i0-$((totalSize-1)); )";
53 };
54 getRandText() {
55 # Desc: Print text within fileSizeList around randPoint
56 # input: array fileSizeList
57 # var totalSize
58 # var randPoint
59
60 local sum psum i size fileStart filePoint fileEnd selStart selEnd selCount file;
61 sum=0; # init byte sum
62 psum=0;
63 for i in "${!fileSizeList[@]}"; do
64 # Move search to next file
65 size="${fileSizeList[i]%%$'\t'*}";
66 sum=$(( sum + size ));
67
68 # Check if point is within file
69 if [[ $sum -gt $randPoint ]]; then
70 # Calculate important positions within file (zero-indexed)
71 fileStart=0; # first byte within file
72 filePoint="$(( randPoint - psum ))"; # point as byte within file
73 fileEnd="$(( size - 1 ))"; # last byte within file
74 selStart="$(( filePoint - CONTEXT/2 ))"; # start of output selection
75 selEnd="$(( filePoint + CONTEXT/2 ))"; # end of output selection
76 # Clamp selection start and end indexes
77 if [[ $selStart -lt $fileStart ]]; then selStart=$fileStart; fi;
78 if [[ $selEnd -gt $fileEnd ]]; then selEnd=$fileEnd; fi;
79 selCount="$(( selEnd - selStart + 1 ))"; # number of bytes within selection
80 # Output context
81 file="$(cut -f2- <<< "${fileSizeList[i]}"; )";
82 printf "INFO:Sample of:%s\n" "$file" 1>&2;
83 tail --bytes=+$((selStart+1)) -- "$file" | head --bytes=$((selCount));
84 printf "\n";
85 return 0;
86 fi;
87 psum=$sum; # store previous sum
88 done;
89 };
90 export -f checkInput getFileSizeList getRandText;
91
92 main() {
93 # Input: arg1 directory path
94 # Output: stdout text
95
96 checkInput "$@"; # check input arguments
97 getFileSizeList "$1"; # make fileSizeList array
98 getRandText; # output text
99 return 0;
100 };
101
102 main "$@" && exit 0;