#!/bin/bash

# Date: 2020-04-08T23:37Z

# Description: A bash script that hashes files immediately after they
# are modified. Watches for 1 hour then exits.

# Dependencies: inotifywait, timeout, awk, b2sum, date

echoerr() { echo "$@" 1>&2; }

DIR_TO_WATCH="/home/baltakatei/Sync"
DIR_LOG="/home/baltakatei/Sync/kodawkuori-07/2020/archive-PERS/logs/files"
DIGEST_ALGO="b2sum"
DIR_LOG_PATH=$DIR_LOG"/""$(date +%Y%m%d)""..""$(hostname)""_""$DIGEST_ALGO""_filewrites.log" # The .log extension is important for inotifywait "--exclude" option.
TIMEOUT="1h" # Limit inotifywait process to 1 hour.
MIN_FILE_SIZE=1 # smallest file size to log (in bytes)

if ! command -v "$DIGEST_ALGO" 1>/dev/null 2>/dev/null; then echoerr "ERROR: $0 could not find command $DIGEST_ALGO ."; exit 1; fi

if [ ! -d "$DIR_TO_WATCH" ]; then echoerr "ERROR: $0 could not parse $DIR_TO_WATCH as directory."; exit 1; fi

timeout $TIMEOUT inotifywait -m -e close_write -e moved_to --exclude ".tmp$" --exclude ".log$" -r --format "%w%f" "$DIR_TO_WATCH" |
    while read line; do
	# note: make sure to exclude the $DIR_LOG_PATH via ".log$" (or equivalent means) to avoid endless logging of log writes.
	EVENT_DATE="$(date +%Y%m%dT%H%M%S.%N%z )"
    	TARGET_FILEPATH="$(echo -n $line )"
	TARGET_FILENAME="$(basename "$TARGET_FILEPATH" )"
	TARGET_FILESIZE="$(du -b "$TARGET_FILEPATH" | awk '{print $1}' )"
	TARGET_FILEMTIME="$(date -r "$TARGET_FILEPATH" +%Y%m%dT%H%M%S.%N%z)"
	TARGET_DIGEST="$(cat "$TARGET_FILEPATH" | $DIGEST_ALGO | awk '{print $1}' )"
    	EVENT_LOG_ENTRY="$EVENT_DATE","$TARGET_FILEMTIME","$DIGEST_ALGO","$TARGET_DIGEST","$TARGET_FILESIZE","$TARGET_FILEPATH"
	echo "$EVENT_LOG_ENTRY" >> /dev/random # Mix written file's digest with system PRNG.
	if [ -d "$DIR_LOG" ] && [ $(( "$TARGET_FILESIZE" - "$MIN_FILE_SIZE" )) -ge 0 ]; then
	    if [ ! -f "$DIR_LOG_PATH" ]; then
		echo "EVENT_DATE,TARGET_FILEMTIME,DIGEST_ALGO,TARGET_DIGEST,TARGET_FILESIZE,TARGET_FILEPATH" >> "$DIR_LOG_PATH" # print field names in first row
	    fi
	    echo "$EVENT_LOG_ENTRY" >> "$DIR_LOG_PATH"; # Log written file's digest if TARGET_FILESIZE greater or equal to MIN_FILE_SIZE (in bytes).
	    #echo "$EVENT_LOG_ENTRY" >> /tmp/bkhashwatch.log #debug
	fi
    done
echo "Timeout of $TIMEOUT elapsed. Exiting."
exit 0