#!/usr/bin/env bash
# Desc: Lists directories immediately containing large amounts of data
# Usage: script.sh [int megabytes] [dir]
# Example: A directory named `vacation_photos` located somewhere in $HOME/
#   containing more than 100MB of files (without checking subdirectories of
#   `vacation_photos`) could be found by running:
#   $ script.sh 100 "$HOME"
#   /home/johndoe/Pictures/unsorted/notjunk/vacation_photos
# Version: 0.1.0

yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails
checkInt() {
    # Desc: Checks if arg is integer
    # Usage: checkInt arg
    # Input: arg: integer
    # Output: - return code 0 (if arg is integer)
    #         - return code 1 (if arg is not integer)
    # Example: if ! checkInt $arg; then echo "not int"; fi;
    # Version: 0.0.1
    local returnState

    #===Process Arg===
    if [[ $# -ne 1 ]]; then
	die "ERROR:Invalid number of arguments:$#";
    fi;
    
    RETEST1='^[0-9]+$'; # Regular Expression to test
    if [[ ! $1 =~ $RETEST1 ]] ; then
	returnState="false";
    else
	returnState="true";
    fi;

    #===Determine function return code===
    if [ "$returnState" = "true" ]; then
	return 0;
    else
	return 1;
    fi;
} # Checks if arg is integer
check_depends() {
    local flag_return=0;

    if ! command -v du 1>/dev/random 2>&1; then flag_return=1; fi;

    return "$flag_return";
}; # returns 1 if missing dependencies
size_dir_contents() {
    # usage: size_dir_contents [path]
    # depends: find, du
    local bytes re;

    bytes=0;
    re="^[0-9]+$";

    #yell "DEBUG:Checking dir:$1";
    while read -r addend; do
        #yell "DEBUG:addend:$addend";
        if [[ $addend =~ $re ]]; then
            bytes="$((bytes + addend))";
        fi;
    done < <( find "$1" -maxdepth 1 -type f -exec du -b --summarize '{}' \; | cut -f1 );
    
    echo "$bytes" && return 0;
}; # return size of dir immediate contents
main() {
    #size_large="100000000"; # 100 MB

    #yell "DEBUG:arg1:$1"; # debug
    #yell "DEBUG:arg2:$2"; # debug

    # check dependencies
    if ! check_depends; then die "FATAL:Missing dependencies."; fi;
    
    # check args
    if ! checkInt "$1"; then
        die "FATAL:Not an int:$1";
    else
        size_l_mb="$1"
        size_l_b="$(( size_l_mb * 1000000 ))";
    fi;
    if [[ ! -d "$2" ]]; then
        die "FATAL:Not a dir:$2";
    else
        dir_in="$2";
    fi;

    # check each dir size contents
    while IFS= read -r -d $'\0' pathdir; do
        #yell "DEBUG:Checking:pathdir:$pathdir"; # debug
        if [[ ! -d "$pathdir" ]]; then continue; fi;
        dir_size="$(size_dir_contents "$pathdir";)"
        #yell "DEBUG:dir_size:$dir_size"; # debug
        if [[ $dir_size -gt "$size_l_b" ]]; then
            printf "%s\n" "$pathdir"; # output
        fi;
        #sleep 1; # debug
    done < <(find "$dir_in" -type d -print0);
    
    return 0;
};

main "$@";
