X-Git-Url: https://zdv2.bktei.com/gitweb/BK-2020-03.git/blobdiff_plain/dd9f30387a6f3e81b56ea3e86bef174422b37c77..fdf917e1ee70b612202fe10fab2e73d0ea077017:/unitproc/bkfind?ds=inline
diff --git a/unitproc/bkfind b/unitproc/bkfind
new file mode 100755
index 0000000..bfba95d
--- /dev/null
+++ b/unitproc/bkfind
@@ -0,0 +1,121 @@
+#!/bin/bash
+
+# Date: 2020-01-20T17:08Z
+#
+# Author: Steven Baltakatei Sandoval (baltakatei.com)
+#
+# License: This bash script, `bkfind`, is licensed under GPLv3 or
+# later by Steven Baltakatei Sandoval:
+#
+# `bkfind`, a duplicate file finder
+# Copyright (C) 2020 Steven Baltakatei Sandoval (baltakatei.com)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# A copy of the GNU General Public License may be found at
+# .
+#
+# Description: This is a script that searches a specified directory
+# for files with a file name containing a specified string. It works
+# as follows:
+#
+# - Search specified directory tree for files that have filenames
+# that contain the specified file's filename. List groups of files
+# sharing the same hash first then list files with unique hashes.
+#
+# Dependencies: find, rhash, uniq, cut, cat, bash. See end of file
+#
+# Tested on:
+#
+# - GNU/Linux Debian 10
+#
+
+
+#==Initialization==
+# Use input arguments to define internal script variables.
+DIR1="$1" # Specified directory
+FILE1="$2" # Specified file
+DUPLICATES1=""
+DUPLICATES2=""
+DUPLICATES3=""
+UNIQUES1=""
+UNIQUES2=""
+UNIQUES3=""
+RHASH_HASH_TYPE="sha512"
+HASH_DISP_LENGTH=16
+let HASH_CHAR_LENGTH="512 / 4" # The number of characters returned by the chosen hash function (ex: `rhash --sha512 {}` produces 512/4=128 hexadecimal chars)
+
+# Strip path information from provided file name.
+FILEBASE1=$(basename "$FILE1")
+
+
+#==Main Program==
+# Generate list of sha512 hashes and filepaths, save to $HASHLIST1
+HASHLIST1="$(find $DIR1 -type f -iname "*$FILEBASE1*" -exec rhash --"$RHASH_HASH_TYPE" {} \;)"
+
+# Specify character position before which characters are dropped from each line with `cut`.
+let CUT_POSITION="1 + $HASH_CHAR_LENGTH - $HASH_DISP_LENGTH"
+
+#====Files with duplicate hashes====
+# Generate sublist of duplicate entries, save to $DUPLICATES1
+DUPLICATES1="$(echo -e "$HASHLIST1" | sort | uniq -D --check-chars=128)"
+
+# Format $DUPLICATES1 for readability by grouping, truncating sha512 hash; save to $DUPLICATES2
+DUPLICATES2="$(echo -e "$DUPLICATES1" | uniq --check-chars=128 --group | cut --characters=$CUT_POSITION-)"
+
+#====Files with unique hashes====
+# Generate sublist of unique entries, save to $UNIQUES1
+UNIQUES1="$(echo -e "$HASHLIST1" | sort | uniq --unique --check-chars=128)"
+
+# Format $UNIQUES1 for readability by truncating sha512 hash; save to $UNIQUES2
+UNIQUES2="$(echo -e "$UNIQUES1" | cut --characters=$CUT_POSITION-)"
+
+
+# List results
+echo -e "$DUPLICATES2"
+echo -e "$UNIQUES2"
+
+# Dependencies:
+#
+# - find (GNU findutils) 4.6.0.225-235f
+# Copyright (C) 2019 Free Software Foundation, Inc.
+# License GPLv3+: GNU GPL version 3 or later .
+# Written by Eric B. Decker, James Youngman, and Kevin Dalley.
+#
+# - RHash v1.3.8
+# License: RHash License
+#
+# - uniq (GNU coreutils) 8.30
+# Copyright (C) 2018 Free Software Foundation, Inc.
+# License GPLv3+: GNU GPL version 3 or later .
+# This is free software: you are free to change and redistribute it.
+# There is NO WARRANTY, to the extent permitted by law.
+# Written by Richard M. Stallman and David MacKenzie.
+#
+# - cut (GNU coreutils) 8.30
+# Copyright (C) 2018 Free Software Foundation, Inc.
+# License GPLv3+: GNU GPL version 3 or later .
+# This is free software: you are free to change and redistribute it.
+# There is NO WARRANTY, to the extent permitted by law.
+# Written by David M. Ihnat, David MacKenzie, and Jim Meyering.
+#
+# - cat (GNU coreutils) 8.30
+# Copyright (C) 2018 Free Software Foundation, Inc.
+# License GPLv3+: GNU GPL version 3 or later .
+# This is free software: you are free to change and redistribute it.
+# There is NO WARRANTY, to the extent permitted by law.
+# Written by Torbjorn Granlund and Richard M. Stallman.
+#
+# - GNU bash, version 5.0.3(1)-release (x86_64-pc-linux-gnu)
+# Copyright (C) 2019 Free Software Foundation, Inc.
+# License GPLv3+: GNU GPL version 3 or later
+# This is free software; you are free to change and redistribute it.
+# There is NO WARRANTY, to the extent permitted by law.