feature(unitproc) Add legacy unit process scripts
[BK-2020-03.git] / unitproc / bkhashwatch
diff --git a/unitproc/bkhashwatch b/unitproc/bkhashwatch
new file mode 100755 (executable)
index 0000000..63aeabc
--- /dev/null
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+# Date: 2020-04-08T23:37Z
+
+# Description: A bash script that hashes files immediately after they
+# are modified. Watches for 1 hour then exits.
+
+# Dependencies: inotifywait, timeout, awk, b2sum, date
+
+echoerr() { echo "$@" 1>&2; }
+
+DIR_TO_WATCH="/home/baltakatei/Sync"
+DIR_LOG="/home/baltakatei/Sync/kodawkuori-07/2020/archive-PERS/logs/files"
+DIGEST_ALGO="b2sum"
+DIR_LOG_PATH=$DIR_LOG"/""$(date +%Y%m%d)""..""$(hostname)""_""$DIGEST_ALGO""_filewrites.log" # The .log extension is important for inotifywait "--exclude" option.
+TIMEOUT="1h" # Limit inotifywait process to 1 hour.
+MIN_FILE_SIZE=1 # smallest file size to log (in bytes)
+
+if ! command -v "$DIGEST_ALGO" 1>/dev/null 2>/dev/null; then echoerr "ERROR: $0 could not find command $DIGEST_ALGO ."; exit 1; fi
+
+if [ ! -d "$DIR_TO_WATCH" ]; then echoerr "ERROR: $0 could not parse $DIR_TO_WATCH as directory."; exit 1; fi
+
+timeout $TIMEOUT inotifywait -m -e close_write -e moved_to --exclude ".tmp$" --exclude ".log$" -r --format "%w%f" "$DIR_TO_WATCH" |
+    while read line; do
+       # note: make sure to exclude the $DIR_LOG_PATH via ".log$" (or equivalent means) to avoid endless logging of log writes.
+       EVENT_DATE="$(date +%Y%m%dT%H%M%S.%N%z )"
+       TARGET_FILEPATH="$(echo -n $line )"
+       TARGET_FILENAME="$(basename "$TARGET_FILEPATH" )"
+       TARGET_FILESIZE="$(du -b "$TARGET_FILEPATH" | awk '{print $1}' )"
+       TARGET_FILEMTIME="$(date -r "$TARGET_FILEPATH" +%Y%m%dT%H%M%S.%N%z)"
+       TARGET_DIGEST="$(cat "$TARGET_FILEPATH" | $DIGEST_ALGO | awk '{print $1}' )"
+       EVENT_LOG_ENTRY="$EVENT_DATE","$TARGET_FILEMTIME","$DIGEST_ALGO","$TARGET_DIGEST","$TARGET_FILESIZE","$TARGET_FILEPATH"
+       echo "$EVENT_LOG_ENTRY" >> /dev/random # Mix written file's digest with system PRNG.
+       if [ -d "$DIR_LOG" ] && [ $(( "$TARGET_FILESIZE" - "$MIN_FILE_SIZE" )) -ge 0 ]; then
+           if [ ! -f "$DIR_LOG_PATH" ]; then
+               echo "EVENT_DATE,TARGET_FILEMTIME,DIGEST_ALGO,TARGET_DIGEST,TARGET_FILESIZE,TARGET_FILEPATH" >> "$DIR_LOG_PATH" # print field names in first row
+           fi
+           echo "$EVENT_LOG_ENTRY" >> "$DIR_LOG_PATH"; # Log written file's digest if TARGET_FILESIZE greater or equal to MIN_FILE_SIZE (in bytes).
+           #echo "$EVENT_LOG_ENTRY" >> /tmp/bkhashwatch.log #debug
+       fi
+    done
+echo "Timeout of $TIMEOUT elapsed. Exiting."
+exit 0