]>
zdv2.bktei.com Git - BK-2020-03.git/blob - unitproc/bkshuf
   2 # Desc: Mixes input lines while also preserving some neighbors 
   3 # Usage: cat file | bkshuf arg1 
   5 # Depends: bc 1.07.1, GNU Coreutils 8.32 (shuf) 
   6 # Input: var: arg1  initial lines to output 
   8 trap 'exit;' SIGPIPE
; # exit early if stdout not being read 
  11 ## For these numbers of lines of input... 
  12 if [[ ! -v BKSHUF_PARAM_LINEC 
]]; then BKSHUF_PARAM_LINEC
=1000000; fi; 
  13 ##   ... target this group size. 
  14 if [[ ! -v BKSHUF_PARAM_GSIZE 
]]; then BKSHUF_PARAM_GSIZE
=25; fi;  
  17 yell
() { echo "$0: $*" >&2; } # print script path and all args to stderr 
  18 die
() { yell 
"$*"; exit 111; } # same as yell() but non-zero exit status 
  19 must
() { "$@" || die 
"cannot $*"; } # runs args as command, reports args if command fails 
  21     # Desc: Consumes stdin; outputs as stdout lines 
  22     # Input: stdin (consumes) 
  23     # Output: stdout (newline delimited) 
  24     # Example: printf "foo\nbar\n" | read_stdin 
  25     # Depends: GNU bash (version 5.1.16) 
  27     local input_stdin output
; 
  30     if [[ -p /dev
/stdin 
]]; then 
  31         input_stdin
="$(cat -)"; 
  34     # Store as output array elements 
  36     if [[ -n $input_stdin ]]; then 
  37         while read -r line
; do 
  39         done < <(printf "%s\n" "$input_stdin"); 
  43     printf "%s\n" "${output[@]}"; 
  44 }; # read stdin to stdout lines 
  46     # Desc: Checks if arg is integer 
  49     # Output: - return code 0 (if arg is integer) 
  50     #         - return code 1 (if arg is not integer) 
  51     # Example: if ! checkInt $arg; then echo "not int"; fi; 
  56     if [[ $# -ne 1 ]]; then 
  57         die 
"ERROR:Invalid number of arguments:$#"; 
  60     RETEST1
='^[0-9]+$'; # Regular Expression to test 
  61     if [[ ! $1 =~ 
$RETEST1 ]] ; then 
  67     #===Determine function return code=== 
  68     if [ "$returnState" = "true" ]; then 
  73 } # Checks if arg is integer 
  75     # Desc: Outputs and destroys line from list_input starting at index ip 
  76     # Usage: consume_line; 
  77     # Input: var: list_input  array input lines 
  78     #        var: lco         int   line count original 
  79     #        var: lcr         int   line count remaining 
  80     #        var: ip          int   list_input index pointer 
  81     # Output: stdout:    a single non-blank element from list_input 
  82     #         list_input     one element destroyed 
  83     #         var: lc_out    incremented once 
  86     n
=0; # for tracking progress iterating through remaining list_input 
  87     ### Loop once through list_input indices until a non-blank line found 
  88     while [[ $n -le $lco ]]; do 
  89         #### check if line at ip is blank 
  90         line
="${list_input[$ip]}"; 
  91         if [[ -n $line ]]; then 
  92             ##### consume line at index ip 
  93             printf "%s\n" "$line"; # print to output 
  95             unset "list_input[$ip]"; # destroy line in list_input array 
  97             ((lcr--
)); # decrement line count remaining lcr 
  98             #yell "DEBUG:Consumed line ip:$ip:$line"; 
 101         #### increment input index pointer, looping around if necessary 
 102         if [[ $ip -le $lco ]]; then ((ip
++)); else ip
=0; fi; 
 103         #### track progress through list_input 
 106 }; # consume and output line in list_input array starting at index ip 
 108     # Desc: Decides whether to read another line in list_input array 
 109     #   by comparing $RANDOM to p_seq_int 
 110     # Usage:  if decide_read; then something; fi; 
 111     # Input: var: p_seq_int probability (int [0 32767]) 
 113     if [[ $RANDOM -lt $p_seq_int ]]; then 
 118 }; # returns 0 with probability p_seq; 1 otherwise 
 121     declare par_l0 par_s0 s_exp s
; 
 122     declare -a list_input
; 
 124     # Check positional arguments 
 125     if [[ $# -gt 0 ]] && ! checkInt 
"$1"; then 
 126         die 
"FATAL:Not an integer:$1"; 
 128         lc_out_max
="$1"; # output line count 
 132     if ! checkInt 
"$BKSHUF_PARAM_LINEC"; then 
 133         die 
"FATAL:Not an int:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_LINEC"; fi; 
 134     if ! checkInt 
"$BKSHUF_PARAM_GSIZE"; then 
 135         die 
"FATAL:Not an int:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_GSIZE"; fi; 
 137     # store input lines from stdin (like `shuf`) 
 138     while read -r line
; do 
 139         if [[ -z $line ]]; then continue; fi; # skip blank lines 
 140         #yell "DEBUG:INPUT:$line"; 
 141         list_input
+=("$line"); 
 142     done < <( read_stdin
; ); 
 144     # calc line count (lc) 
 145     lc
="${#list_input[@]}"; 
 146     #lc="$(printf "%s\n" "${list_input[@]}" | wc -l )"; 
 147     #yell "DEBUG:lc:$lc"; 
 148     #yell "DEBUG:list_input:$(declare -p list_input)"; 
 150     # calculate group size s 
 151     par_l0
="$BKSHUF_PARAM_LINEC"; 
 152     par_s0
="$BKSHUF_PARAM_GSIZE"; 
 153     s_exp
="(( $par_s0 - 1 )/( ( l( $par_l0 ) )^2 ))*(l( $lc ))^2+1"; 
 154     s
="$(echo "scale
=12; $s_exp" | bc -l)"; 
 156     # calculate probabilities p_jump, p_seq 
 157     p_jump
="$(echo "scale
=12; 1 / ( $s )" | bc -l)"; 
 158     p_seq
="$(echo "scale
=12; 1 - $p_jump" | bc -l)"; 
 159     p_seq_int
="$(echo "scale
=0; ($p_seq * 32767)/1" | bc -l)"; # p_seq as int [0 32767] for $RANDOM range 
 162     lco
="${#list_input[@]}"; # save original input line count 
 164     lc_out
="0"; # init output line counter 
 165     if [[ -z "$lc_out_max" ]]; then lc_out_max
="$lco"; fi;     
 166     ip
="$(shuf -i0-$(( lco - 1 )) -n1)"; # init input index pointer 
 167     RANDOM
="$(shuf -i0-32767 -n1)"; # init Bash PRNG 
 169     #yell "DEBUG:max_blanks:$max_blanks" 
 170     while [[ $lcr -ge 1 ]] && [[ $lc_out -lt $lc_out_max ]]; do 
 171         ## Select random unconsumed input line and consume it to output 
 172         ip
="$(shuf -i0-$(( lco - 1 )) -n1)"; # init input index pointer 
 175         ## Consume the next sequential line with probability p_seq. 
 176         while decide_read 
&& [[ $lcr -ge 1 ]] && [[ $lc_out -lt $lc_out_max ]]; do 
 180         ((n_loop1
++)); # increment jump counter 
 182     #yell "DEBUG:n_loop1:$n_loop1"; # count jumps 
 188 # Author: Steven Baltakatei Sandoval 
 191 # Dependency information 
 194 # Copyright 1991-1994, 1997, 1998, 2000, 2004, 2006, 2008, 2012-2017 Free Software Foundation, Inc. 
 197 # shuf (GNU coreutils) 8.32 
 198 # Copyright (C) 2020 Free Software Foundation, Inc. 
 199 # License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>. 
 200 # This is free software: you are free to change and redistribute it. 
 201 # There is NO WARRANTY, to the extent permitted by law. 
 203 # Written by Paul Eggert.