#!/usr/bin/env bash

# Note: GNU Parallel obviates the need for this script. The main
# motivation to writing this script was to manage the number of CPU
# threads being spawned by a list of files being feed to a while loop.

# Functions
yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
try() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails
count_jobs() {
    # Desc: Count and return total number of jobs
    # Usage: count_jobs
    # Input: None.
    # Output: stdout   integer number of jobs
    # Depends: Bash 5.1.16
    # Example: while [[$(count_jobs) -gt 0]]; do echo "Working..."; sleep 1; done;
    # Version: 0.0.1
    
    local job_count;
    job_count="$(jobs -r | wc -l | tr -d ' ' )";
    #yell "DEBUG:job_count:$job_count";
    if [[ -z $job_count ]]; then job_count="0"; fi;
    echo "$job_count";
}; # Return number of background jobs
test_job() {
    #yell "DEBUG:starting test_job() with:$1";
    # Do work on $1 file
    if [[ -f $1 ]]; then cat "$1" 1>/dev/random 2>&1; fi; # read file
    sleep "$(shuf -i1-10 -n1)"; # debug
}; # Test job
count_jobs_display_update() {
    # Depends: various variables
    if [[ $(( SECONDS % jobs_update_interval )) -eq $jobs_update_init_delay ]] && \
       [[ $permit_update == "true" ]]; then
    jobs_beg="$jobs_n";
    jobs_run="$(count_jobs)";
    jobs_end="$((jobs_beg - jobs_run))";
    yell "STATUS:$jobs_beg jobs begun. $jobs_end jobs ended. $jobs_run jobs running.";
    permit_update="false";
    fi;
    if [[ $(( SECONDS % jobs_update_interval )) -eq $((jobs_update_init_delay + 1)) ]]; then
	permit_update="true";
    fi;
}; # periodically display updates


# Test Code
path_target="/tmp"; # path to dir with files to run test_job() on

## Setup file list
list_paths="$(find "$path_target" -type f 2>/dev/null)";

## Perform test_job() on each file
jobs_max=100; # adjust me (e.g. "4" on 4-core CPU)
jobs_update_interval=5; # seconds.
jobs_update_init_delay=1; # seconds.
permit_update="true"; # flag to make updates happen periodically
jobs_n=0; # loop counter
jobs_check_delay=0.001; # seconds between each job count check if running jobs > jobs_max
while read -r line; do
    ((jobs_n++));
    #yell "DEBUG:jobs_n:$jobs_n"

    ## Wait until job count falls below $jobs_max
    while [[ "$(count_jobs)" -gt $jobs_max ]]; do
	#yell "DEBUG:sleeping since $(count_jobs) > $jobs_max";
	sleep "$jobs_check_delay";
	count_jobs_display_update;
    done;

    ## Start new parallel job on file $line
    test_job "$line" 1>/dev/null 2>&1 &

    ## Get job status updates every $jobs_update_interval seconds
    count_jobs_display_update;

    #sleep 1; # debug
done < <( shuf < <(echo -n "$list_paths") );
yell "STATUS:All jobs ($jobs_n) started. $(count_jobs) jobs running.";

# Detect when no outstanding jobs
while [[ "$(count_jobs)" -gt 0 ]]; do
    sleep 1;
    count_jobs_display_update;
done;
yell "STATUS:No more jobs visible.";

# Author: Steven Baltakatei Sandoval
# License: GPLv3+