Merge branch 'feature/user-scripts/BK-2020-03' into develop
[BK-2020-03.git] / unitproc / bkhashwatch
1 #!/bin/bash
2
3 # Date: 2020-04-08T23:37Z
4
5 # Description: A bash script that hashes files immediately after they
6 # are modified. Watches for 1 hour then exits.
7
8 # Dependencies: inotifywait, timeout, awk, b2sum, date
9
10 echoerr() { echo "$@" 1>&2; }
11
12 DIR_TO_WATCH="/home/baltakatei/Sync"
13 DIR_LOG="/home/baltakatei/Sync/kodawkuori-07/2020/archive-PERS/logs/files"
14 DIGEST_ALGO="b2sum"
15 DIR_LOG_PATH=$DIR_LOG"/""$(date +%Y%m%d)""..""$(hostname)""_""$DIGEST_ALGO""_filewrites.log" # The .log extension is important for inotifywait "--exclude" option.
16 TIMEOUT="1h" # Limit inotifywait process to 1 hour.
17 MIN_FILE_SIZE=1 # smallest file size to log (in bytes)
18
19 if ! command -v "$DIGEST_ALGO" 1>/dev/null 2>/dev/null; then echoerr "ERROR: $0 could not find command $DIGEST_ALGO ."; exit 1; fi
20
21 if [ ! -d "$DIR_TO_WATCH" ]; then echoerr "ERROR: $0 could not parse $DIR_TO_WATCH as directory."; exit 1; fi
22
23 timeout $TIMEOUT inotifywait -m -e close_write -e moved_to --exclude ".tmp$" --exclude ".log$" -r --format "%w%f" "$DIR_TO_WATCH" |
24 while read line; do
25 # note: make sure to exclude the $DIR_LOG_PATH via ".log$" (or equivalent means) to avoid endless logging of log writes.
26 EVENT_DATE="$(date +%Y%m%dT%H%M%S.%N%z )"
27 TARGET_FILEPATH="$(echo -n $line )"
28 TARGET_FILENAME="$(basename "$TARGET_FILEPATH" )"
29 TARGET_FILESIZE="$(du -b "$TARGET_FILEPATH" | awk '{print $1}' )"
30 TARGET_FILEMTIME="$(date -r "$TARGET_FILEPATH" +%Y%m%dT%H%M%S.%N%z)"
31 TARGET_DIGEST="$(cat "$TARGET_FILEPATH" | $DIGEST_ALGO | awk '{print $1}' )"
32 EVENT_LOG_ENTRY="$EVENT_DATE","$TARGET_FILEMTIME","$DIGEST_ALGO","$TARGET_DIGEST","$TARGET_FILESIZE","$TARGET_FILEPATH"
33 echo "$EVENT_LOG_ENTRY" >> /dev/random # Mix written file's digest with system PRNG.
34 if [ -d "$DIR_LOG" ] && [ $(( "$TARGET_FILESIZE" - "$MIN_FILE_SIZE" )) -ge 0 ]; then
35 if [ ! -f "$DIR_LOG_PATH" ]; then
36 echo "EVENT_DATE,TARGET_FILEMTIME,DIGEST_ALGO,TARGET_DIGEST,TARGET_FILESIZE,TARGET_FILEPATH" >> "$DIR_LOG_PATH" # print field names in first row
37 fi
38 echo "$EVENT_LOG_ENTRY" >> "$DIR_LOG_PATH"; # Log written file's digest if TARGET_FILESIZE greater or equal to MIN_FILE_SIZE (in bytes).
39 #echo "$EVENT_LOG_ENTRY" >> /tmp/bkhashwatch.log #debug
40 fi
41 done
42 echo "Timeout of $TIMEOUT elapsed. Exiting."
43 exit 0