#!/bin/bash # Date: 2020-01-20T17:08Z # # Author: Steven Baltakatei Sandoval (baltakatei.com) # # License: This bash script, `bkfind`, is licensed under GPLv3 or # later by Steven Baltakatei Sandoval: # # `bkfind`, a duplicate file finder # Copyright (C) 2020 Steven Baltakatei Sandoval (baltakatei.com) # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # A copy of the GNU General Public License may be found at # . # # Description: This is a script that searches a specified directory # for files with a file name containing a specified string. It works # as follows: # # - Search specified directory tree for files that have filenames # that contain the specified file's filename. List groups of files # sharing the same hash first then list files with unique hashes. # # Dependencies: find, rhash, uniq, cut, cat, bash. See end of file # # Tested on: # # - GNU/Linux Debian 10 # #==Initialization== # Use input arguments to define internal script variables. DIR1="$1" # Specified directory FILE1="$2" # Specified file DUPLICATES1="" DUPLICATES2="" DUPLICATES3="" UNIQUES1="" UNIQUES2="" UNIQUES3="" RHASH_HASH_TYPE="sha512" HASH_DISP_LENGTH=16 let HASH_CHAR_LENGTH="512 / 4" # The number of characters returned by the chosen hash function (ex: `rhash --sha512 {}` produces 512/4=128 hexadecimal chars) # Strip path information from provided file name. FILEBASE1=$(basename "$FILE1") #==Main Program== # Generate list of sha512 hashes and filepaths, save to $HASHLIST1 HASHLIST1="$(find $DIR1 -type f -iname "*$FILEBASE1*" -exec rhash --"$RHASH_HASH_TYPE" {} \;)" # Specify character position before which characters are dropped from each line with `cut`. let CUT_POSITION="1 + $HASH_CHAR_LENGTH - $HASH_DISP_LENGTH" #====Files with duplicate hashes==== # Generate sublist of duplicate entries, save to $DUPLICATES1 DUPLICATES1="$(echo -e "$HASHLIST1" | sort | uniq -D --check-chars=128)" # Format $DUPLICATES1 for readability by grouping, truncating sha512 hash; save to $DUPLICATES2 DUPLICATES2="$(echo -e "$DUPLICATES1" | uniq --check-chars=128 --group | cut --characters=$CUT_POSITION-)" #====Files with unique hashes==== # Generate sublist of unique entries, save to $UNIQUES1 UNIQUES1="$(echo -e "$HASHLIST1" | sort | uniq --unique --check-chars=128)" # Format $UNIQUES1 for readability by truncating sha512 hash; save to $UNIQUES2 UNIQUES2="$(echo -e "$UNIQUES1" | cut --characters=$CUT_POSITION-)" # List results echo -e "$DUPLICATES2" echo -e "$UNIQUES2" # Dependencies: # # - find (GNU findutils) 4.6.0.225-235f # Copyright (C) 2019 Free Software Foundation, Inc. # License GPLv3+: GNU GPL version 3 or later . # Written by Eric B. Decker, James Youngman, and Kevin Dalley. # # - RHash v1.3.8 # License: RHash License # # - uniq (GNU coreutils) 8.30 # Copyright (C) 2018 Free Software Foundation, Inc. # License GPLv3+: GNU GPL version 3 or later . # This is free software: you are free to change and redistribute it. # There is NO WARRANTY, to the extent permitted by law. # Written by Richard M. Stallman and David MacKenzie. # # - cut (GNU coreutils) 8.30 # Copyright (C) 2018 Free Software Foundation, Inc. # License GPLv3+: GNU GPL version 3 or later . # This is free software: you are free to change and redistribute it. # There is NO WARRANTY, to the extent permitted by law. # Written by David M. Ihnat, David MacKenzie, and Jim Meyering. # # - cat (GNU coreutils) 8.30 # Copyright (C) 2018 Free Software Foundation, Inc. # License GPLv3+: GNU GPL version 3 or later . # This is free software: you are free to change and redistribute it. # There is NO WARRANTY, to the extent permitted by law. # Written by Torbjorn Granlund and Richard M. Stallman. # # - GNU bash, version 5.0.3(1)-release (x86_64-pc-linux-gnu) # Copyright (C) 2019 Free Software Foundation, Inc. # License GPLv3+: GNU GPL version 3 or later # This is free software; you are free to change and redistribute it. # There is NO WARRANTY, to the extent permitted by law.