#!/usr/bin/env bash
# Desc: Mixes input lines while also preserving some neighbors
# Usage: cat file | bkshuf arg1
# Version 0.1.2
# Depends: bc 1.07.1, GNU Coreutils 8.32 (shuf)
# Input: var: arg1  initial lines to output

# Load env vars
## For these numbers of lines of input...
if [[ ! -v BKSHUF_PARAM_LINEC ]]; then BKSHUF_PARAM_LINEC=1000000; fi;
##   ... target this group size.
if [[ ! -v BKSHUF_PARAM_GSIZE ]]; then BKSHUF_PARAM_GSIZE=25; fi; 


yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails
read_stdin() {
    # Desc: Consumes stdin; outputs as stdout lines
    # Input: stdin (consumes)
    # Output: stdout (newline delimited)
    # Example: printf "foo\nbar\n" | read_stdin
    # Depends: GNU bash (version 5.1.16)
    # Version: 0.0.1
    local input_stdin output;

    # Store stdin
    if [[ -p /dev/stdin ]]; then
        input_stdin="$(cat -)";
    fi; 
    
    # Store as output array elements
    ## Read in stdin
    if [[ -n $input_stdin ]]; then
        while read -r line; do
            output+=("$line");
        done < <(printf "%s\n" "$input_stdin");
    fi;

    # Print to stdout
    printf "%s\n" "${output[@]}";
}; # read stdin to stdout lines
checkInt() {
    # Desc: Checks if arg is integer
    # Usage: checkInt arg
    # Input: arg: integer
    # Output: - return code 0 (if arg is integer)
    #         - return code 1 (if arg is not integer)
    # Example: if ! checkInt $arg; then echo "not int"; fi;
    # Version: 0.0.1
    local returnState

    #===Process Arg===
    if [[ $# -ne 1 ]]; then
	die "ERROR:Invalid number of arguments:$#";
    fi;
    
    RETEST1='^[0-9]+$'; # Regular Expression to test
    if [[ ! $1 =~ $RETEST1 ]] ; then
	returnState="false";
    else
	returnState="true";
    fi;

    #===Determine function return code===
    if [ "$returnState" = "true" ]; then
	return 0;
    else
	return 1;
    fi;
} # Checks if arg is integer
consume_line() {
    # Desc: Outputs and destroys line from list_input starting at index ip
    # Usage: consume_line;
    # Input: var: list_input  array input lines
    #        var: lco         int   line count original
    #        var: lcr         int   line count remaining
    #        var: ip          int   list_input index pointer
    # Output: stdout:    a single non-blank element from list_input
    #         list_input     one element destroyed
    #         var: lc_out    incremented once
    local n line;

    n=0; # for tracking progress iterating through remaining list_input
    ### Loop once through list_input indices until a non-blank line found
    while [[ $n -le $lco ]]; do
        #### check if line at ip is blank
        line="${list_input[$ip]}";
        if [[ -n $line ]]; then
            ##### consume line at index ip
            printf "%s\n" "$line" & # print to output
            unset "list_input[$ip]"; # destroy line in list_input array
            ((lc_out++));
            ((lcr--)); # decrement line count remaining lcr
            #yell "DEBUG:Consumed line ip:$ip:$line";
            break;
        fi;
        #### increment input index pointer, looping around if necessary
        if [[ $ip -le $lco ]]; then ((ip++)); else ip=0; fi;
        #### track progress through list_input
        ((n++));
    done;
}; # consume and output line in list_input array starting at index ip
decide_read() {
    # Desc: Decides whether to read another line in list_input array
    #   by comparing $RANDOM to p_seq_int
    # Usage:  if decide_read; then something; fi;
    # Input: var: p_seq_int probability (int [0 32767])

    if [[ $RANDOM -lt $p_seq_int ]]; then
        return 0;
    else
        return 1;
    fi;
}; # returns 0 with probability p_seq; 1 otherwise

main() {
    declare par_l0 par_s0 s_exp s;
    declare -a list_input;

    # Check positional arguments
    if [[ $# -gt 0 ]] && ! checkInt "$1"; then
        die "FATAL:Not an integer:$1";
    else
        lc_out_max="$1"; # output line count
    fi;

    # Check env vars
    if ! checkInt "$BKSHUF_PARAM_LINEC"; then
        die "FATAL:Not an int:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_LINEC"; fi;
    if ! checkInt "$BKSHUF_PARAM_GSIZE"; then
        die "FATAL:Not an int:BKSHUF_PARAM_LINEC:$BKSHUF_PARAM_GSIZE"; fi;
    
    # store input lines from stdin (like `shuf`)
    while read -r line; do
        if [[ -z $line ]]; then continue; fi; # skip blank lines
        #yell "DEBUG:INPUT:$line";
        list_input+=("$line");
    done < <( read_stdin; );

    # calc line count (lc)
    lc="${#list_input[@]}";
    #lc="$(printf "%s\n" "${list_input[@]}" | wc -l )";
    #yell "DEBUG:lc:$lc";
    #yell "DEBUG:list_input:$(declare -p list_input)";

    # calculate group size s
    par_l0="$BKSHUF_PARAM_LINEC";
    par_s0="$BKSHUF_PARAM_GSIZE";
    s_exp="(( $par_s0 - 1 )/( ( l( $par_l0 ) )^2 ))*(l( $lc ))^2+1";
    s="$(echo "scale=12; $s_exp" | bc -l)";

    # calculate probabilities p_jump, p_seq
    p_jump="$(echo "scale=12; 1 / ( $s )" | bc -l)";
    p_seq="$(echo "scale=12; 1 - $p_jump" | bc -l)";
    p_seq_int="$(echo "scale=0; ($p_seq * 32767)/1" | bc -l)"; # p_seq as int [0 32767] for $RANDOM range

    # generate output
    lco="${#list_input[@]}"; # save original input line count
    lcr="$lco";
    lc_out="0"; # init output line counter
    if [[ -z "$lc_out_max" ]]; then lc_out_max="$lco"; fi;    
    ip="$(shuf -i0-$(( lco - 1 )) -n1)"; # init input index pointer
    RANDOM="$(shuf -i0-32767 -n1)"; # init Bash PRNG
    n_loop1="0";
    #yell "DEBUG:max_blanks:$max_blanks"
    while [[ $lcr -ge 1 ]] && [[ $lc_out -lt $lc_out_max ]]; do
        ## Select random unconsumed input line and consume it to output
        ip="$(shuf -i0-$(( lco - 1 )) -n1)"; # init input index pointer
        consume_line;

        ## Consume the next sequential line with probability p_seq.
        while decide_read && [[ $lcr -ge 1 ]] && [[ $lc_out -lt $lc_out_max ]]; do
            consume_line;
        done;
        
        ((n_loop1++)); # increment jump counter
    done;
    #yell "DEBUG:n_loop1:$n_loop1"; # count jumps
    
}; # main program

main "$@";

# Author: Steven Baltakatei Sandoval
# License: GPLv3+

# Dependency information

# bc 1.07.1
# Copyright 1991-1994, 1997, 1998, 2000, 2004, 2006, 2008, 2012-2017 Free Software Foundation, Inc.


# shuf (GNU coreutils) 8.32
# Copyright (C) 2020 Free Software Foundation, Inc.
# License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>.
# This is free software: you are free to change and redistribute it.
# There is NO WARRANTY, to the extent permitted by law.

# Written by Paul Eggert.