]> zdv2.bktei.com Git - BK-2020-03.git/blob - user/randtxt.sh
c809246341373c8c6bc04d2c0434bcf2a0962774
[BK-2020-03.git] / user / randtxt.sh
1 #!/bin/bash
2 # Desc: Outputs text at randomish position within dir of text files
3 # Usage: randtxt.sh DIR
4 # Version 0.0.4
5 # Depends: Bash 5.1.16, GNU findutils 4.8.0, GNU Coreutils 8.32
6
7 SAMPLE=10000;
8 CONTEXT="5000"; # total bytes +1 before and after point within file to print
9 if [[ $(( CONTEXT/2 + CONTEXT/2 )) -lt $CONTEXT ]]; then ((CONTEXT++)); fi;
10
11
12 yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
13 die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
14 must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails
15 showUsage() {
16 cat <<'EOF'
17 USAGE:
18 randtxt.sh DIR
19
20 EXAMPLE:
21 randtxt.sh ~/
22 EOF
23 } # Display information on how to use this script.
24 checkInput() {
25 # input: arg1
26 if [[ $# -gt 1 ]]; then die "FATAL:Too many arguments"; fi;
27 if [[ $# -lt 1 ]]; then die "FATAL:Not enough arguments"; fi;
28 if [[ ! -d "$1" ]]; then die "FATAL:Not a dir:$1"; else return 0; fi;
29 #yell "DEBUG:checkInput() finished."; # debug
30 };
31 getFileSizeList() {
32 # Desc: Create list of file sizes and paths
33 # input: arg1 directory path
34 # output: array fileSizeList
35 # var totalSize
36 # var randPoint
37
38 declare -g -a fileSizeList;
39 mapfile -d '' -t fileSizeList < <(
40 find -- "$1" -type f -name "*.txt" -printf '%s\t%p\0' | shuf -z -n"$SAMPLE";
41 ); # Build array by feeding null-delimited lines from `find` to `mapfile`
42 #declare -p fileSizeList; # debug
43
44 declare -g totalSize=0;
45 local i size;
46 for i in "${!fileSizeList[@]}"; do
47 #declare -p i; # debug
48 #yell "DEBUG:fileSizeList[i]:${fileSizeList[i]}"; # debug
49 size="${fileSizeList[i]%%$'\t'*}";
50 #declare -p size; # debug
51 totalSize=$((totalSize + size));
52 #declare -p i size totalSize; # debug
53 #yell "=============="; # debug
54 done;
55 if [[ $totalSize -le 0 ]]; then die "FATAL:Total size is zero."; fi;
56
57 declare -g randPoint
58 randPoint="$(shuf -n1 -i0-$((totalSize-1)); )";
59 #declare -p fileSizeList totalSize randPoint 1>&2;
60 #yell "DEBUG:fileSizeList element count:${#fileSizeList[@]}"; # debug
61 #yell "DEBUG:getFileSizeList() finished."; # debug
62 };
63 getRandText() {
64 # Desc: Print text within fileSizeList around randPoint
65 # input: array fileSizeList
66 # var totalSize
67 # var randPoint
68
69 local sum psum i size fileStart filePoint fileEnd selStart selEnd selCount file;
70 sum=0; # init byte sum
71 psum=0;
72 for i in "${!fileSizeList[@]}"; do
73 # Move search to next file
74 size="${fileSizeList[i]%%$'\t'*}";
75 sum=$(( sum + size ));
76
77 # Check if point is within file
78 if [[ $sum -gt $randPoint ]]; then
79 # Calculate important positions within file (zero-indexed)
80 fileStart=0; # first byte within file
81 filePoint="$(( randPoint - psum ))"; # point as byte within file
82 fileEnd="$(( size - 1 ))"; # last byte within file
83 selStart="$(( filePoint - CONTEXT/2 ))"; # start of output selection
84 selEnd="$(( filePoint + CONTEXT/2 ))"; # end of output selection
85 # Clamp selection start and end indexes
86 if [[ $selStart -lt $fileStart ]]; then selStart=$fileStart; fi;
87 if [[ $selEnd -gt $fileEnd ]]; then selEnd=$fileEnd; fi;
88 selCount="$(( selEnd - selStart + 1 ))"; # number of bytes within selection
89 # Output context
90 file="$(cut -f2- <<< "${fileSizeList[i]}"; )";
91 printf "INFO:Sample of:%s\n" "$file";
92 #head --bytes=$((selEnd + 1 )) -- "$file" | tail --bytes=+$((selStart + 1));
93 tail --bytes=+$((selStart+1)) -- "$file" | head --bytes=$((selCount));
94 printf "\n";
95 return 0;
96 fi;
97 psum=$sum; # store previous sum
98 done;
99 #yell "DEBUG:getRandText() finished."; # debug
100 };
101 export -f checkInput getFileSizeList getRandText;
102
103 main() {
104 # Input: arg1 directory path
105 # Output: stdout text
106
107 checkInput "$@"; # check input arguments
108 getFileSizeList "$1"; # make fileSizeList array
109 getRandText; # output text
110 return 0;
111 };
112
113 main "$@" && exit 0;