#!/usr/bin/env bash # Note: GNU Parallel obviates the need for this script. The main # motivation to writing this script was to manage the number of CPU # threads being spawned by a list of files being feed to a while loop. # Functions yell() { echo "$0: $*" >&2; } # print script path and all args to stderr die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status try() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails count_jobs() { # Desc: Count and return total number of jobs # Usage: count_jobs # Input: None. # Output: stdout integer number of jobs # Depends: Bash 5.1.16 # Example: while [[$(count_jobs) -gt 0]]; do echo "Working..."; sleep 1; done; # Version: 0.0.1 local job_count; job_count="$(jobs -r | wc -l | tr -d ' ' )"; #yell "DEBUG:job_count:$job_count"; if [[ -z $job_count ]]; then job_count="0"; fi; echo "$job_count"; }; # Return number of background jobs test_job() { #yell "DEBUG:starting test_job() with:$1"; # Do work on $1 file if [[ -f $1 ]]; then cat "$1" 1>/dev/random 2>&1; fi; # read file sleep "$(shuf -i1-10 -n1)"; # debug }; # Test job count_jobs_display_update() { # Depends: various variables if [[ $(( SECONDS % jobs_update_interval )) -eq $jobs_update_init_delay ]] && \ [[ $permit_update == "true" ]]; then jobs_beg="$jobs_n"; jobs_run="$(count_jobs)"; jobs_end="$((jobs_beg - jobs_run))"; yell "STATUS:$jobs_beg jobs begun. $jobs_end jobs ended. $jobs_run jobs running."; permit_update="false"; fi; if [[ $(( SECONDS % jobs_update_interval )) -eq $((jobs_update_init_delay + 1)) ]]; then permit_update="true"; fi; }; # periodically display updates # Test Code path_target="/tmp"; # path to dir with files to run test_job() on ## Setup file list list_paths="$(find "$path_target" -type f 2>/dev/null)"; ## Perform test_job() on each file jobs_max=100; # adjust me (e.g. "4" on 4-core CPU) jobs_update_interval=5; # seconds. jobs_update_init_delay=1; # seconds. permit_update="true"; # flag to make updates happen periodically jobs_n=0; # loop counter jobs_check_delay=0.001; # seconds between each job count check if running jobs > jobs_max while read -r line; do ((jobs_n++)); #yell "DEBUG:jobs_n:$jobs_n" ## Wait until job count falls below $jobs_max while [[ "$(count_jobs)" -gt $jobs_max ]]; do #yell "DEBUG:sleeping since $(count_jobs) > $jobs_max"; sleep "$jobs_check_delay"; count_jobs_display_update; done; ## Start new parallel job on file $line test_job "$line" 1>/dev/null 2>&1 & ## Get job status updates every $jobs_update_interval seconds count_jobs_display_update; #sleep 1; # debug done < <( shuf < <(echo -n "$list_paths") ); yell "STATUS:All jobs ($jobs_n) started. $(count_jobs) jobs running."; # Detect when no outstanding jobs while [[ "$(count_jobs)" -gt 0 ]]; do sleep 1; count_jobs_display_update; done; yell "STATUS:No more jobs visible."; # Author: Steven Baltakatei Sandoval # License: GPLv3+