]>
zdv2.bktei.com Git - BK-2020-03.git/blob - unitproc/bkshuf
e41d725b9620c82b29b919120da0bbb4a0bef2bb
   2 # Desc: Mixes input lines while also preserving some neighbors 
   3 # Usage: cat file | bkshuf arg1 
   5 # Depends: bc 1.07.1, GNU Coreutils 8.32 (shuf) 
   6 # Input: var: arg1  initial lines to output 
   9 ## For these numbers of lines of input... 
  10 if [[ ! -v BKSHUF_PARAM_LINEC 
]]; then BKSHUF_PARAM_LINEC
=1000000; fi; 
  11 ##   ... target this group size. 
  12 if [[ ! -v BKSHUF_PARAM_GSIZE 
]]; then BKSHUF_PARAM_GSIZE
=25; fi;  
  15 yell
() { echo "$0: $*" >&2; } # print script path and all args to stderr 
  16 die
() { yell 
"$*"; exit 111; } # same as yell() but non-zero exit status 
  17 must
() { "$@" || die 
"cannot $*"; } # runs args as command, reports args if command fails 
  19     # Desc: Consumes stdin; outputs as stdout lines 
  20     # Input: stdin (consumes) 
  21     # Output: stdout (newline delimited) 
  22     # Example: printf "foo\nbar\n" | read_stdin 
  23     # Depends: GNU bash (version 5.1.16) 
  25     local input_stdin output
; 
  28     if [[ -p /dev
/stdin 
]]; then 
  29         input_stdin
="$(cat -)"; 
  32     # Store as output array elements 
  34     if [[ -n $input_stdin ]]; then 
  35         while read -r line
; do 
  37         done < <(printf "%s\n" "$input_stdin"); 
  41     printf "%s\n" "${output[@]}"; 
  42 }; # read stdin to stdout lines 
  44     # Desc: Checks if arg is integer 
  47     # Output: - return code 0 (if arg is integer) 
  48     #         - return code 1 (if arg is not integer) 
  49     # Example: if ! checkInt $arg; then echo "not int"; fi; 
  54     if [[ $# -ne 1 ]]; then 
  55         die 
"ERROR:Invalid number of arguments:$#"; 
  58     RETEST1
='^[0-9]+$'; # Regular Expression to test 
  59     if [[ ! $1 =~ 
$RETEST1 ]] ; then 
  65     #===Determine function return code=== 
  66     if [ "$returnState" = "true" ]; then 
  71 } # Checks if arg is integer 
  73     # Desc: Outputs and destroys line from list_input starting at index ip 
  74     # Usage: consume_line; 
  75     # Input: var: list_input  array input lines 
  76     #        var: lco         int   line count original 
  77     #        var: lcr         int   line count remaining 
  78     #        var: ip          int   list_input index pointer 
  79     # Output: stdout:    a single non-blank element from list_input 
  80     #         list_input     one element destroyed 
  81     #         var: lc_out    incremented once 
  84     n
=0; # for tracking progress iterating through remaining list_input 
  85     ### Loop once through list_input indices until a non-blank line found 
  86     while [[ $n -le $lco ]]; do 
  87         #### check if line at ip is blank 
  88         line
="${list_input[$ip]}"; 
  89         if [[ -n $line ]]; then 
  90             ##### consume line at index ip 
  91             printf "%s\n" "$line" & # print to output 
  92             unset "list_input[$ip]"; # destroy line in list_input array 
  94             ((lcr--
)); # decrement line count remaining lcr 
  95             #yell "DEBUG:Consumed line ip:$ip:$line"; 
  98         #### increment input index pointer, looping around if necessary 
  99         if [[ $ip -le $lco ]]; then ((ip
++)); else ip
=0; fi; 
 100         #### track progress through list_input 
 103 }; # consume and output line in list_input array starting at index ip 
 105     # Desc: Decides whether to read another line in list_input array 
 106     #   by comparing $RANDOM to p_seq_int 
 107     # Usage:  if decide_read; then something; fi; 
 108     # Input: var: p_seq_int probability (int [0 32767]) 
 110     if [[ $RANDOM -lt $p_seq_int ]]; then 
 115 }; # returns 0 with probability p_seq; 1 otherwise 
 118     declare par_l0 par_s0 s_exp s
; 
 119     declare -a list_input
; 
 121     # Check positional arguments 
 122     if [[ $# -gt 0 ]] && ! checkInt 
"$1"; then 
 123         die 
"FATAL:Not an integer:$1"; 
 125         lc_out_max
="$1"; # output line count 
 129     if ! checkInt 
"$BKSHUF_PARAM_LINEC"; then 
 130         die 
"FATAL:Not an int:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_LINEC"; fi; 
 131     if ! checkInt 
"$BKSHUF_PARAM_GSIZE"; then 
 132         die 
"FATAL:Not an int:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_GSIZE"; fi; 
 134     # store input lines from stdin (like `shuf`) 
 135     while read -r line
; do 
 136         if [[ -z $line ]]; then continue; fi; # skip blank lines 
 137         #yell "DEBUG:INPUT:$line"; 
 138         list_input
+=("$line"); 
 139     done < <( read_stdin
; ); 
 141     # calc line count (lc) 
 142     lc
="${#list_input[@]}"; 
 143     #lc="$(printf "%s\n" "${list_input[@]}" | wc -l )"; 
 144     #yell "DEBUG:lc:$lc"; 
 145     #yell "DEBUG:list_input:$(declare -p list_input)"; 
 147     # calculate group size s 
 148     par_l0
="$BKSHUF_PARAM_LINEC"; 
 149     par_s0
="$BKSHUF_PARAM_GSIZE"; 
 150     s_exp
="(( $par_s0 - 1 )/( ( l( $par_l0 ) )^2 ))*(l( $lc ))^2+1"; 
 151     s
="$(echo "scale
=12; $s_exp" | bc -l)"; 
 153     # calculate probabilities p_jump, p_seq 
 154     p_jump
="$(echo "scale
=12; 1 / ( $s )" | bc -l)"; 
 155     p_seq
="$(echo "scale
=12; 1 - $p_jump" | bc -l)"; 
 156     p_seq_int
="$(echo "scale
=0; ($p_seq * 32767)/1" | bc -l)"; # p_seq as int [0 32767] for $RANDOM range 
 159     lco
="${#list_input[@]}"; # save original input line count 
 161     lc_out
="0"; # init output line counter 
 162     if [[ -z "$lc_out_max" ]]; then lc_out_max
="$lco"; fi;     
 163     ip
="$(shuf -i0-$(( lco - 1 )) -n1)"; # init input index pointer 
 165     #yell "DEBUG:max_blanks:$max_blanks" 
 166     while [[ $lcr -ge 1 ]] && [[ $lc_out -lt $lc_out_max ]]; do 
 167         ## Select random unconsumed input line and consume it to output 
 168         ip
="$(shuf -i0-$(( lco - 1 )) -n1)"; # init input index pointer 
 171         ## Consume the next sequential line with probability p_seq. 
 172         while decide_read 
&& [[ $lcr -ge 1 ]] && [[ $lc_out -lt $lc_out_max ]]; do 
 176         ((n_loop1
++)); # increment jump counter 
 178     #yell "DEBUG:n_loop1:$n_loop1"; # count jumps 
 184 # Author: Steven Baltakatei Sandoval 
 187 # Dependency information 
 190 # Copyright 1991-1994, 1997, 1998, 2000, 2004, 2006, 2008, 2012-2017 Free Software Foundation, Inc. 
 193 # shuf (GNU coreutils) 8.32 
 194 # Copyright (C) 2020 Free Software Foundation, Inc. 
 195 # License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>. 
 196 # This is free software: you are free to change and redistribute it. 
 197 # There is NO WARRANTY, to the extent permitted by law. 
 199 # Written by Paul Eggert.