3 # Date: 2020-01-20T17:08Z
5 # Author: Steven Baltakatei Sandoval (baltakatei.com)
7 # License: This bash script, `bkfind`, is licensed under GPLv3 or
8 # later by Steven Baltakatei Sandoval:
10 # `bkfind`, a duplicate file finder
11 # Copyright (C) 2020 Steven Baltakatei Sandoval (baltakatei.com)
13 # This program is free software: you can redistribute it and/or modify
14 # it under the terms of the GNU General Public License as published by
15 # the Free Software Foundation, either version 3 of the License, or
18 # This program is distributed in the hope that it will be useful,
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 # GNU General Public License for more details.
23 # A copy of the GNU General Public License may be found at
24 # <https://www.gnu.org/licenses/>.
26 # Description: This is a script that searches a specified directory
27 # for files with a file name containing a specified string. It works
30 # - Search specified directory tree for files that have filenames
31 # that contain the specified file's filename. List groups of files
32 # sharing the same hash first then list files with unique hashes.
34 # Dependencies: find, rhash, uniq, cut, cat, bash. See end of file
38 # - GNU/Linux Debian 10
43 # Use input arguments to define internal script variables.
44 DIR1
="$1" # Specified directory
45 FILE1
="$2" # Specified file
52 RHASH_HASH_TYPE
="sha512"
54 let HASH_CHAR_LENGTH
="512 / 4" # The number of characters returned by the chosen hash function (ex: `rhash --sha512 {}` produces 512/4=128 hexadecimal chars)
56 # Strip path information from provided file name.
57 FILEBASE1
=$
(basename "$FILE1")
61 # Generate list of sha512 hashes and filepaths, save to $HASHLIST1
62 HASHLIST1
="$(find $DIR1 -type f -iname "*$FILEBASE1*" -exec rhash --"$RHASH_HASH_TYPE" {} \;)"
64 # Specify character position before which characters are dropped from each line with `cut`.
65 let CUT_POSITION
="1 + $HASH_CHAR_LENGTH - $HASH_DISP_LENGTH"
67 #====Files with duplicate hashes====
68 # Generate sublist of duplicate entries, save to $DUPLICATES1
69 DUPLICATES1
="$(echo -e "$HASHLIST1" | sort | uniq -D --check-chars=128)"
71 # Format $DUPLICATES1 for readability by grouping, truncating sha512 hash; save to $DUPLICATES2
72 DUPLICATES2
="$(echo -e "$DUPLICATES1" | uniq --check-chars=128 --group | cut --characters=$CUT_POSITION-)"
74 #====Files with unique hashes====
75 # Generate sublist of unique entries, save to $UNIQUES1
76 UNIQUES1
="$(echo -e "$HASHLIST1" | sort | uniq --unique --check-chars=128)"
78 # Format $UNIQUES1 for readability by truncating sha512 hash; save to $UNIQUES2
79 UNIQUES2
="$(echo -e "$UNIQUES1" | cut --characters=$CUT_POSITION-)"
83 echo -e "$DUPLICATES2"
88 # - find (GNU findutils) 4.6.0.225-235f
89 # Copyright (C) 2019 Free Software Foundation, Inc.
90 # License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>.
91 # Written by Eric B. Decker, James Youngman, and Kevin Dalley.
94 # License: RHash License <http://rhash.sourceforge.net/license.php>
96 # - uniq (GNU coreutils) 8.30
97 # Copyright (C) 2018 Free Software Foundation, Inc.
98 # License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>.
99 # This is free software: you are free to change and redistribute it.
100 # There is NO WARRANTY, to the extent permitted by law.
101 # Written by Richard M. Stallman and David MacKenzie.
103 # - cut (GNU coreutils) 8.30
104 # Copyright (C) 2018 Free Software Foundation, Inc.
105 # License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>.
106 # This is free software: you are free to change and redistribute it.
107 # There is NO WARRANTY, to the extent permitted by law.
108 # Written by David M. Ihnat, David MacKenzie, and Jim Meyering.
110 # - cat (GNU coreutils) 8.30
111 # Copyright (C) 2018 Free Software Foundation, Inc.
112 # License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>.
113 # This is free software: you are free to change and redistribute it.
114 # There is NO WARRANTY, to the extent permitted by law.
115 # Written by Torbjorn Granlund and Richard M. Stallman.
117 # - GNU bash, version 5.0.3(1)-release (x86_64-pc-linux-gnu)
118 # Copyright (C) 2019 Free Software Foundation, Inc.
119 # License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
120 # This is free software; you are free to change and redistribute it.
121 # There is NO WARRANTY, to the extent permitted by law.