X-Git-Url: https://zdv2.bktei.com/gitweb/BK-2020-03.git/blobdiff_plain/dd9f30387a6f3e81b56ea3e86bef174422b37c77..fdf917e1ee70b612202fe10fab2e73d0ea077017:/unitproc/bkfind diff --git a/unitproc/bkfind b/unitproc/bkfind new file mode 100755 index 0000000..bfba95d --- /dev/null +++ b/unitproc/bkfind @@ -0,0 +1,121 @@ +#!/bin/bash + +# Date: 2020-01-20T17:08Z +# +# Author: Steven Baltakatei Sandoval (baltakatei.com) +# +# License: This bash script, `bkfind`, is licensed under GPLv3 or +# later by Steven Baltakatei Sandoval: +# +# `bkfind`, a duplicate file finder +# Copyright (C) 2020 Steven Baltakatei Sandoval (baltakatei.com) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# A copy of the GNU General Public License may be found at +# . +# +# Description: This is a script that searches a specified directory +# for files with a file name containing a specified string. It works +# as follows: +# +# - Search specified directory tree for files that have filenames +# that contain the specified file's filename. List groups of files +# sharing the same hash first then list files with unique hashes. +# +# Dependencies: find, rhash, uniq, cut, cat, bash. See end of file +# +# Tested on: +# +# - GNU/Linux Debian 10 +# + + +#==Initialization== +# Use input arguments to define internal script variables. +DIR1="$1" # Specified directory +FILE1="$2" # Specified file +DUPLICATES1="" +DUPLICATES2="" +DUPLICATES3="" +UNIQUES1="" +UNIQUES2="" +UNIQUES3="" +RHASH_HASH_TYPE="sha512" +HASH_DISP_LENGTH=16 +let HASH_CHAR_LENGTH="512 / 4" # The number of characters returned by the chosen hash function (ex: `rhash --sha512 {}` produces 512/4=128 hexadecimal chars) + +# Strip path information from provided file name. +FILEBASE1=$(basename "$FILE1") + + +#==Main Program== +# Generate list of sha512 hashes and filepaths, save to $HASHLIST1 +HASHLIST1="$(find $DIR1 -type f -iname "*$FILEBASE1*" -exec rhash --"$RHASH_HASH_TYPE" {} \;)" + +# Specify character position before which characters are dropped from each line with `cut`. +let CUT_POSITION="1 + $HASH_CHAR_LENGTH - $HASH_DISP_LENGTH" + +#====Files with duplicate hashes==== +# Generate sublist of duplicate entries, save to $DUPLICATES1 +DUPLICATES1="$(echo -e "$HASHLIST1" | sort | uniq -D --check-chars=128)" + +# Format $DUPLICATES1 for readability by grouping, truncating sha512 hash; save to $DUPLICATES2 +DUPLICATES2="$(echo -e "$DUPLICATES1" | uniq --check-chars=128 --group | cut --characters=$CUT_POSITION-)" + +#====Files with unique hashes==== +# Generate sublist of unique entries, save to $UNIQUES1 +UNIQUES1="$(echo -e "$HASHLIST1" | sort | uniq --unique --check-chars=128)" + +# Format $UNIQUES1 for readability by truncating sha512 hash; save to $UNIQUES2 +UNIQUES2="$(echo -e "$UNIQUES1" | cut --characters=$CUT_POSITION-)" + + +# List results +echo -e "$DUPLICATES2" +echo -e "$UNIQUES2" + +# Dependencies: +# +# - find (GNU findutils) 4.6.0.225-235f +# Copyright (C) 2019 Free Software Foundation, Inc. +# License GPLv3+: GNU GPL version 3 or later . +# Written by Eric B. Decker, James Youngman, and Kevin Dalley. +# +# - RHash v1.3.8 +# License: RHash License +# +# - uniq (GNU coreutils) 8.30 +# Copyright (C) 2018 Free Software Foundation, Inc. +# License GPLv3+: GNU GPL version 3 or later . +# This is free software: you are free to change and redistribute it. +# There is NO WARRANTY, to the extent permitted by law. +# Written by Richard M. Stallman and David MacKenzie. +# +# - cut (GNU coreutils) 8.30 +# Copyright (C) 2018 Free Software Foundation, Inc. +# License GPLv3+: GNU GPL version 3 or later . +# This is free software: you are free to change and redistribute it. +# There is NO WARRANTY, to the extent permitted by law. +# Written by David M. Ihnat, David MacKenzie, and Jim Meyering. +# +# - cat (GNU coreutils) 8.30 +# Copyright (C) 2018 Free Software Foundation, Inc. +# License GPLv3+: GNU GPL version 3 or later . +# This is free software: you are free to change and redistribute it. +# There is NO WARRANTY, to the extent permitted by law. +# Written by Torbjorn Granlund and Richard M. Stallman. +# +# - GNU bash, version 5.0.3(1)-release (x86_64-pc-linux-gnu) +# Copyright (C) 2019 Free Software Foundation, Inc. +# License GPLv3+: GNU GPL version 3 or later +# This is free software; you are free to change and redistribute it. +# There is NO WARRANTY, to the extent permitted by law.