#!/usr/bin/env bash # Desc: Mixes input lines while also preserving some neighbors # Usage: cat file | bkshuf arg1 # Version 0.2.0 # Depends: bc 1.07.1, GNU Coreutils 8.32 (shuf) # Input: var: arg1 initial lines to output trap 'exit;' SIGPIPE; # exit early if stdout not being read # Load env vars ## For these numbers of lines of input... if [[ ! -v BKSHUF_PARAM_LINEC ]]; then BKSHUF_PARAM_LINEC=1000000; fi; ## ... target this group size. if [[ ! -v BKSHUF_PARAM_GSIZE ]]; then BKSHUF_PARAM_GSIZE=25; fi; yell() { echo "$0: $*" >&2; } # print script path and all args to stderr die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails read_stdin() { # Desc: Consumes stdin; outputs as stdout lines # Input: stdin (consumes) # Output: stdout (newline delimited) # Example: printf "foo\nbar\n" | read_stdin # Depends: GNU bash (version 5.1.16) # Version: 0.0.1 local input_stdin output; # Store stdin if [[ -p /dev/stdin ]]; then input_stdin="$(cat -)"; fi; # Store as output array elements ## Read in stdin if [[ -n $input_stdin ]]; then while read -r line; do output+=("$line"); done < <(printf "%s\n" "$input_stdin"); fi; # Print to stdout printf "%s\n" "${output[@]}"; }; # read stdin to stdout lines checkInt() { # Desc: Checks if arg is integer # Usage: checkInt arg # Input: arg: integer # Output: - return code 0 (if arg is integer) # - return code 1 (if arg is not integer) # Example: if ! checkInt $arg; then echo "not int"; fi; # Version: 0.0.1 local returnState #===Process Arg=== if [[ $# -ne 1 ]]; then die "ERROR:Invalid number of arguments:$#"; fi; RETEST1='^[0-9]+$'; # Regular Expression to test if [[ ! $1 =~ $RETEST1 ]] ; then returnState="false"; else returnState="true"; fi; #===Determine function return code=== if [ "$returnState" = "true" ]; then return 0; else return 1; fi; } # Checks if arg is integer consume_line() { # Desc: Outputs and destroys line from list_input starting at index ip # Usage: consume_line; # Input: var: list_input array input lines # var: lco int line count original # var: lcr int line count remaining # var: ip int list_input index pointer # Output: stdout: a single non-blank element from list_input # list_input one element destroyed # var: lc_out incremented once local n line; n=0; # for tracking progress iterating through remaining list_input ### Loop once through list_input indices until a non-blank line found while [[ $n -le $lco ]]; do #### check if line at ip is blank line="${list_input[$ip]}"; if [[ -n $line ]]; then ##### consume line at index ip printf "%s\n" "$line"; # print to output unset "list_input[$ip]"; # destroy line in list_input array ((lc_out++)); ((lcr--)); # decrement line count remaining lcr #yell "DEBUG:Consumed line ip:$ip:$line"; break; fi; #### increment input index pointer, looping around if necessary if [[ $ip -le $lco ]]; then ((ip++)); else ip=0; fi; #### track progress through list_input ((n++)); done; }; # consume and output line in list_input array starting at index ip decide_read() { # Desc: Decides whether to read another line in list_input array # by comparing $RANDOM to p_seq_int # Usage: if decide_read; then something; fi; # Input: var: p_seq_int probability (int [0 32767]) if [[ $RANDOM -lt $p_seq_int ]]; then return 0; else return 1; fi; }; # returns 0 with probability p_seq; 1 otherwise main() { declare par_l0 par_s0 s_exp s; declare -a list_input; # Check positional arguments if [[ $# -gt 0 ]] && ! checkInt "$1"; then die "FATAL:Not an integer:$1"; else lc_out_max="$1"; # output line count fi; # Check env vars if ! checkInt "$BKSHUF_PARAM_LINEC"; then die "FATAL:Not an int:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_LINEC"; fi; if ! checkInt "$BKSHUF_PARAM_GSIZE"; then die "FATAL:Not an int:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_GSIZE"; fi; # store input lines from stdin (like `shuf`) while read -r line; do if [[ -z $line ]]; then continue; fi; # skip blank lines #yell "DEBUG:INPUT:$line"; list_input+=("$line"); done < <( read_stdin; ); # calc line count (lc) lc="${#list_input[@]}"; #lc="$(printf "%s\n" "${list_input[@]}" | wc -l )"; #yell "DEBUG:lc:$lc"; #yell "DEBUG:list_input:$(declare -p list_input)"; # calculate group size s par_l0="$BKSHUF_PARAM_LINEC"; par_s0="$BKSHUF_PARAM_GSIZE"; s_exp="(( $par_s0 - 1 )/( ( l( $par_l0 ) )^2 ))*(l( $lc ))^2+1"; s="$(echo "scale=12; $s_exp" | bc -l)"; # calculate probabilities p_jump, p_seq p_jump="$(echo "scale=12; 1 / ( $s )" | bc -l)"; p_seq="$(echo "scale=12; 1 - $p_jump" | bc -l)"; p_seq_int="$(echo "scale=0; ($p_seq * 32767)/1" | bc -l)"; # p_seq as int [0 32767] for $RANDOM range # generate output lco="${#list_input[@]}"; # save original input line count lcr="$lco"; lc_out="0"; # init output line counter if [[ -z "$lc_out_max" ]]; then lc_out_max="$lco"; fi; ip="$(shuf -i0-$(( lco - 1 )) -n1)"; # init input index pointer RANDOM="$(shuf -i0-32767 -n1)"; # init Bash PRNG n_loop1="0"; #yell "DEBUG:max_blanks:$max_blanks" while [[ $lcr -ge 1 ]] && [[ $lc_out -lt $lc_out_max ]]; do ## Select random unconsumed input line and consume it to output ip="$(shuf -i0-$(( lco - 1 )) -n1)"; # init input index pointer consume_line; ## Consume the next sequential line with probability p_seq. while decide_read && [[ $lcr -ge 1 ]] && [[ $lc_out -lt $lc_out_max ]]; do consume_line; done; ((n_loop1++)); # increment jump counter done; #yell "DEBUG:n_loop1:$n_loop1"; # count jumps }; # main program main "$@"; # Author: Steven Baltakatei Sandoval # License: GPLv3+ # Dependency information # bc 1.07.1 # Copyright 1991-1994, 1997, 1998, 2000, 2004, 2006, 2008, 2012-2017 Free Software Foundation, Inc. # shuf (GNU coreutils) 8.32 # Copyright (C) 2020 Free Software Foundation, Inc. # License GPLv3+: GNU GPL version 3 or later . # This is free software: you are free to change and redistribute it. # There is NO WARRANTY, to the extent permitted by law. # Written by Paul Eggert.