Commit | Line | Data |
---|---|---|
22772fe8 SBS |
1 | #!/usr/bin/env bash |
2 | # Desc: Mixes input lines while also preserving some neighbors | |
3 | # Usage: cat file | bkshuf arg1 | |
4 | # Version 0.0.1 | |
5 | # Depends: bc 1.07.1, GNU Coreutils 8.32 (shuf) | |
6 | # Input: var: arg1 initial lines to output | |
7 | ||
8 | BKSHUF_PARAM_LINEC=1000000; | |
9 | BKSHUF_PARAM_GSIZE=25 # lines per group for BKSHUF_PARAM_LINEC lines of input | |
10 | ||
11 | ||
12 | yell() { echo "$0: $*" >&2; } # print script path and all args to stderr | |
13 | die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status | |
14 | must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails | |
15 | read_stdin() { | |
16 | # Desc: Consumes stdin; outputs as stdout lines | |
17 | # Input: stdin (consumes) | |
18 | # Output: stdout (newline delimited) | |
19 | # Example: printf "foo\nbar\n" | read_stdin | |
20 | # Depends: GNU bash (version 5.1.16) | |
21 | # Version: 0.0.1 | |
22 | local input_stdin output; | |
23 | ||
24 | # Store stdin | |
25 | if [[ -p /dev/stdin ]]; then | |
26 | input_stdin="$(cat -)"; | |
27 | fi; | |
28 | ||
29 | # Store as output array elements | |
30 | ## Read in stdin | |
31 | if [[ -n $input_stdin ]]; then | |
32 | while read -r line; do | |
33 | output+=("$line"); | |
34 | done < <(printf "%s\n" "$input_stdin"); | |
35 | fi; | |
36 | ||
37 | # Print to stdout | |
38 | printf "%s\n" "${output[@]}"; | |
39 | }; # read stdin to stdout lines | |
40 | checkInt() { | |
41 | # Desc: Checks if arg is integer | |
42 | # Usage: checkInt arg | |
43 | # Input: arg: integer | |
44 | # Output: - return code 0 (if arg is integer) | |
45 | # - return code 1 (if arg is not integer) | |
46 | # Example: if ! checkInt $arg; then echo "not int"; fi; | |
47 | # Version: 0.0.1 | |
48 | local returnState | |
49 | ||
50 | #===Process Arg=== | |
51 | if [[ $# -ne 1 ]]; then | |
52 | die "ERROR:Invalid number of arguments:$#"; | |
53 | fi; | |
54 | ||
55 | RETEST1='^[0-9]+$'; # Regular Expression to test | |
56 | if [[ ! $1 =~ $RETEST1 ]] ; then | |
57 | returnState="false"; | |
58 | else | |
59 | returnState="true"; | |
60 | fi; | |
61 | ||
62 | #===Determine function return code=== | |
63 | if [ "$returnState" = "true" ]; then | |
64 | return 0; | |
65 | else | |
66 | return 1; | |
67 | fi; | |
68 | } # Checks if arg is integer | |
69 | consume_line() { | |
70 | # Desc: Outputs and destroys line from list_input starting at index ip | |
71 | # Usage: consume_line; | |
72 | # Input: var: list_input array input lines | |
73 | # var: lco int line count original | |
74 | # var: lcr int line count remaining | |
75 | # var: ip int list_input index pointer | |
76 | # Output: stdout: a single non-blank element from list_input | |
77 | # list_input one element destroyed | |
78 | # var: lc_out incremented once | |
79 | local n line; | |
80 | ||
81 | n=0; # for tracking progress iterating through remaining list_input | |
82 | ### Loop once through list_input indices until a non-blank line found | |
83 | while [[ $n -le $lco ]]; do | |
84 | #### check if line at ip is blank | |
85 | line="${list_input[$ip]}"; | |
86 | if [[ -n $line ]]; then | |
87 | ##### consume line at index ip | |
88 | printf "%s\n" "$line" & # print to output | |
89 | unset "list_input[$ip]"; # destroy line in list_input array | |
90 | ((lc_out++)); | |
91 | ((lcr--)); # decrement line count remaining lcr | |
92 | #yell "DEBUG:Consumed line ip:$ip:$line"; | |
93 | break; | |
94 | fi; | |
95 | #### increment input index pointer, looping around if necessary | |
96 | if [[ $ip -le $lco ]]; then ((ip++)); else ip=0; fi; | |
97 | #### track progress through list_input | |
98 | ((n++)); | |
99 | done; | |
100 | }; # consume and output line in list_input array starting at index ip | |
101 | decide_read() { | |
102 | # Desc: Decides whether to read another line in list_input array | |
103 | # by comparing $RANDOM to p_seq_int | |
104 | # Usage: if decide_read; then something; fi; | |
105 | # Input: var: p_seq_int probability (int [0 32767]) | |
106 | ||
107 | if [[ $RANDOM -lt $p_seq_int ]]; then | |
108 | return 0; | |
109 | else | |
110 | return 1; | |
111 | fi; | |
112 | }; # returns 0 with probability p_seq; 1 otherwise | |
113 | ||
114 | main() { | |
115 | declare par_l0 par_s0 s_exp s; | |
116 | declare -a list_input; | |
117 | ||
118 | # Check positional arguments | |
119 | if [[ $# -gt 0 ]] && ! checkInt "$1"; then | |
120 | die "FATAL:Not an integer:$1"; | |
121 | else | |
122 | lc_out_max="$1"; # output line count | |
123 | fi; | |
124 | ||
125 | ||
126 | # store input lines from stdin (like `shuf`) | |
127 | while read -r line; do | |
128 | if [[ -z $line ]]; then continue; fi; # skip blank lines | |
129 | #yell "DEBUG:INPUT:$line"; | |
130 | list_input+=("$line"); | |
131 | done < <( read_stdin; ); | |
132 | ||
133 | # calc line count (lc) | |
134 | lc="${#list_input[@]}"; | |
135 | #lc="$(printf "%s\n" "${list_input[@]}" | wc -l )"; | |
136 | #yell "DEBUG:lc:$lc"; | |
137 | #yell "DEBUG:list_input:$(declare -p list_input)"; | |
138 | ||
139 | # calculate group size s | |
140 | par_l0="$BKSHUF_PARAM_LINEC"; | |
141 | par_s0="$BKSHUF_PARAM_GSIZE"; | |
142 | s_exp="(( $par_s0 - 1 )/( ( l( $par_l0 ) )^2 ))*(l( $lc ))^2+1"; | |
143 | s="$(echo "scale=12; $s_exp" | bc -l)"; | |
144 | ||
145 | # calculate probabilities p_jump, p_seq | |
146 | p_jump="$(echo "scale=12; 1 / ( $s )" | bc -l)"; | |
147 | p_seq="$(echo "scale=12; 1 - $p_jump" | bc -l)"; | |
148 | p_seq_int="$(echo "scale=0; ($p_seq * 32767)/1" | bc -l)"; # p_seq as int [0 32767] for $RANDOM range | |
149 | ||
150 | # generate output | |
151 | lco="${#list_input[@]}"; # save original input line count | |
152 | lcr="$lco"; | |
153 | lc_out="0"; # init output line counter | |
154 | if [[ -z "$lc_out_max" ]]; then lc_out_max="$lco"; fi; | |
155 | ip="$(shuf -i0-$(( lco - 1 )) -n1)"; # init input index pointer | |
156 | n_loop1="0"; | |
157 | #yell "DEBUG:max_blanks:$max_blanks" | |
158 | while [[ $lcr -ge 1 ]] && [[ $lc_out -lt $lc_out_max ]]; do | |
159 | ## Select random unconsumed input line and consume it to output | |
160 | ip="$(shuf -i0-$(( lco - 1 )) -n1)"; # init input index pointer | |
161 | consume_line; | |
162 | ||
163 | ## Consume the next sequential line with probability p_seq. | |
164 | while decide_read && [[ $lcr -ge 1 ]] && [[ $lc_out -lt $lc_out_max ]]; do | |
165 | consume_line; | |
166 | done; | |
167 | ||
168 | ((n_loop1++)); # increment jump counter | |
169 | done; | |
170 | #yell "DEBUG:n_loop1:$n_loop1"; # count jumps | |
171 | ||
172 | }; # main program | |
173 | ||
174 | main "$@"; | |
175 | ||
176 | # Author: Steven Baltakatei Sandoval | |
177 | # License: GPLv3+ | |
178 | ||
179 | # Dependency information | |
180 | ||
181 | # bc 1.07.1 | |
182 | # Copyright 1991-1994, 1997, 1998, 2000, 2004, 2006, 2008, 2012-2017 Free Software Foundation, Inc. | |
183 | ||
184 | ||
185 | # shuf (GNU coreutils) 8.32 | |
186 | # Copyright (C) 2020 Free Software Foundation, Inc. | |
187 | # License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>. | |
188 | # This is free software: you are free to change and redistribute it. | |
189 | # There is NO WARRANTY, to the extent permitted by law. | |
190 | ||
191 | # Written by Paul Eggert. | |
192 | ||
193 |