--- /dev/null
+#!/usr/bin/env bash
+# Desc: Mixes input lines while also preserving some neighbors
+# Usage: cat file | bkshuf arg1
+# Version 0.0.1
+# Depends: bc 1.07.1, GNU Coreutils 8.32 (shuf)
+# Input: var: arg1 initial lines to output
+
+BKSHUF_PARAM_LINEC=1000000;
+BKSHUF_PARAM_GSIZE=25 # lines per group for BKSHUF_PARAM_LINEC lines of input
+
+
+yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
+die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
+must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails
+read_stdin() {
+ # Desc: Consumes stdin; outputs as stdout lines
+ # Input: stdin (consumes)
+ # Output: stdout (newline delimited)
+ # Example: printf "foo\nbar\n" | read_stdin
+ # Depends: GNU bash (version 5.1.16)
+ # Version: 0.0.1
+ local input_stdin output;
+
+ # Store stdin
+ if [[ -p /dev/stdin ]]; then
+ input_stdin="$(cat -)";
+ fi;
+
+ # Store as output array elements
+ ## Read in stdin
+ if [[ -n $input_stdin ]]; then
+ while read -r line; do
+ output+=("$line");
+ done < <(printf "%s\n" "$input_stdin");
+ fi;
+
+ # Print to stdout
+ printf "%s\n" "${output[@]}";
+}; # read stdin to stdout lines
+checkInt() {
+ # Desc: Checks if arg is integer
+ # Usage: checkInt arg
+ # Input: arg: integer
+ # Output: - return code 0 (if arg is integer)
+ # - return code 1 (if arg is not integer)
+ # Example: if ! checkInt $arg; then echo "not int"; fi;
+ # Version: 0.0.1
+ local returnState
+
+ #===Process Arg===
+ if [[ $# -ne 1 ]]; then
+ die "ERROR:Invalid number of arguments:$#";
+ fi;
+
+ RETEST1='^[0-9]+$'; # Regular Expression to test
+ if [[ ! $1 =~ $RETEST1 ]] ; then
+ returnState="false";
+ else
+ returnState="true";
+ fi;
+
+ #===Determine function return code===
+ if [ "$returnState" = "true" ]; then
+ return 0;
+ else
+ return 1;
+ fi;
+} # Checks if arg is integer
+consume_line() {
+ # Desc: Outputs and destroys line from list_input starting at index ip
+ # Usage: consume_line;
+ # Input: var: list_input array input lines
+ # var: lco int line count original
+ # var: lcr int line count remaining
+ # var: ip int list_input index pointer
+ # Output: stdout: a single non-blank element from list_input
+ # list_input one element destroyed
+ # var: lc_out incremented once
+ local n line;
+
+ n=0; # for tracking progress iterating through remaining list_input
+ ### Loop once through list_input indices until a non-blank line found
+ while [[ $n -le $lco ]]; do
+ #### check if line at ip is blank
+ line="${list_input[$ip]}";
+ if [[ -n $line ]]; then
+ ##### consume line at index ip
+ printf "%s\n" "$line" & # print to output
+ unset "list_input[$ip]"; # destroy line in list_input array
+ ((lc_out++));
+ ((lcr--)); # decrement line count remaining lcr
+ #yell "DEBUG:Consumed line ip:$ip:$line";
+ break;
+ fi;
+ #### increment input index pointer, looping around if necessary
+ if [[ $ip -le $lco ]]; then ((ip++)); else ip=0; fi;
+ #### track progress through list_input
+ ((n++));
+ done;
+}; # consume and output line in list_input array starting at index ip
+decide_read() {
+ # Desc: Decides whether to read another line in list_input array
+ # by comparing $RANDOM to p_seq_int
+ # Usage: if decide_read; then something; fi;
+ # Input: var: p_seq_int probability (int [0 32767])
+
+ if [[ $RANDOM -lt $p_seq_int ]]; then
+ return 0;
+ else
+ return 1;
+ fi;
+}; # returns 0 with probability p_seq; 1 otherwise
+
+main() {
+ declare par_l0 par_s0 s_exp s;
+ declare -a list_input;
+
+ # Check positional arguments
+ if [[ $# -gt 0 ]] && ! checkInt "$1"; then
+ die "FATAL:Not an integer:$1";
+ else
+ lc_out_max="$1"; # output line count
+ fi;
+
+
+ # store input lines from stdin (like `shuf`)
+ while read -r line; do
+ if [[ -z $line ]]; then continue; fi; # skip blank lines
+ #yell "DEBUG:INPUT:$line";
+ list_input+=("$line");
+ done < <( read_stdin; );
+
+ # calc line count (lc)
+ lc="${#list_input[@]}";
+ #lc="$(printf "%s\n" "${list_input[@]}" | wc -l )";
+ #yell "DEBUG:lc:$lc";
+ #yell "DEBUG:list_input:$(declare -p list_input)";
+
+ # calculate group size s
+ par_l0="$BKSHUF_PARAM_LINEC";
+ par_s0="$BKSHUF_PARAM_GSIZE";
+ s_exp="(( $par_s0 - 1 )/( ( l( $par_l0 ) )^2 ))*(l( $lc ))^2+1";
+ s="$(echo "scale=12; $s_exp" | bc -l)";
+
+ # calculate probabilities p_jump, p_seq
+ p_jump="$(echo "scale=12; 1 / ( $s )" | bc -l)";
+ p_seq="$(echo "scale=12; 1 - $p_jump" | bc -l)";
+ p_seq_int="$(echo "scale=0; ($p_seq * 32767)/1" | bc -l)"; # p_seq as int [0 32767] for $RANDOM range
+
+ # generate output
+ lco="${#list_input[@]}"; # save original input line count
+ lcr="$lco";
+ lc_out="0"; # init output line counter
+ if [[ -z "$lc_out_max" ]]; then lc_out_max="$lco"; fi;
+ ip="$(shuf -i0-$(( lco - 1 )) -n1)"; # init input index pointer
+ n_loop1="0";
+ #yell "DEBUG:max_blanks:$max_blanks"
+ while [[ $lcr -ge 1 ]] && [[ $lc_out -lt $lc_out_max ]]; do
+ ## Select random unconsumed input line and consume it to output
+ ip="$(shuf -i0-$(( lco - 1 )) -n1)"; # init input index pointer
+ consume_line;
+
+ ## Consume the next sequential line with probability p_seq.
+ while decide_read && [[ $lcr -ge 1 ]] && [[ $lc_out -lt $lc_out_max ]]; do
+ consume_line;
+ done;
+
+ ((n_loop1++)); # increment jump counter
+ done;
+ #yell "DEBUG:n_loop1:$n_loop1"; # count jumps
+
+}; # main program
+
+main "$@";
+
+# Author: Steven Baltakatei Sandoval
+# License: GPLv3+
+
+# Dependency information
+
+# bc 1.07.1
+# Copyright 1991-1994, 1997, 1998, 2000, 2004, 2006, 2008, 2012-2017 Free Software Foundation, Inc.
+
+
+# shuf (GNU coreutils) 8.32
+# Copyright (C) 2020 Free Software Foundation, Inc.
+# License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>.
+# This is free software: you are free to change and redistribute it.
+# There is NO WARRANTY, to the extent permitted by law.
+
+# Written by Paul Eggert.
+
+