Commit | Line | Data |
---|---|---|
fdf917e1 SBS |
1 | #!/bin/bash |
2 | ||
3 | # Date: 2020-01-20T17:08Z | |
4 | # | |
5 | # Author: Steven Baltakatei Sandoval (baltakatei.com) | |
6 | # | |
7 | # License: This bash script, `bkfind`, is licensed under GPLv3 or | |
8 | # later by Steven Baltakatei Sandoval: | |
9 | # | |
10 | # `bkfind`, a duplicate file finder | |
11 | # Copyright (C) 2020 Steven Baltakatei Sandoval (baltakatei.com) | |
12 | # | |
13 | # This program is free software: you can redistribute it and/or modify | |
14 | # it under the terms of the GNU General Public License as published by | |
15 | # the Free Software Foundation, either version 3 of the License, or | |
16 | # any later version. | |
17 | # | |
18 | # This program is distributed in the hope that it will be useful, | |
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
21 | # GNU General Public License for more details. | |
22 | # | |
23 | # A copy of the GNU General Public License may be found at | |
24 | # <https://www.gnu.org/licenses/>. | |
25 | # | |
26 | # Description: This is a script that searches a specified directory | |
27 | # for files with a file name containing a specified string. It works | |
28 | # as follows: | |
29 | # | |
30 | # - Search specified directory tree for files that have filenames | |
31 | # that contain the specified file's filename. List groups of files | |
32 | # sharing the same hash first then list files with unique hashes. | |
33 | # | |
34 | # Dependencies: find, rhash, uniq, cut, cat, bash. See end of file | |
35 | # | |
36 | # Tested on: | |
37 | # | |
38 | # - GNU/Linux Debian 10 | |
39 | # | |
40 | ||
41 | ||
42 | #==Initialization== | |
43 | # Use input arguments to define internal script variables. | |
44 | DIR1="$1" # Specified directory | |
45 | FILE1="$2" # Specified file | |
46 | DUPLICATES1="" | |
47 | DUPLICATES2="" | |
48 | DUPLICATES3="" | |
49 | UNIQUES1="" | |
50 | UNIQUES2="" | |
51 | UNIQUES3="" | |
52 | RHASH_HASH_TYPE="sha512" | |
53 | HASH_DISP_LENGTH=16 | |
54 | let HASH_CHAR_LENGTH="512 / 4" # The number of characters returned by the chosen hash function (ex: `rhash --sha512 {}` produces 512/4=128 hexadecimal chars) | |
55 | ||
56 | # Strip path information from provided file name. | |
57 | FILEBASE1=$(basename "$FILE1") | |
58 | ||
59 | ||
60 | #==Main Program== | |
61 | # Generate list of sha512 hashes and filepaths, save to $HASHLIST1 | |
62 | HASHLIST1="$(find $DIR1 -type f -iname "*$FILEBASE1*" -exec rhash --"$RHASH_HASH_TYPE" {} \;)" | |
63 | ||
64 | # Specify character position before which characters are dropped from each line with `cut`. | |
65 | let CUT_POSITION="1 + $HASH_CHAR_LENGTH - $HASH_DISP_LENGTH" | |
66 | ||
67 | #====Files with duplicate hashes==== | |
68 | # Generate sublist of duplicate entries, save to $DUPLICATES1 | |
69 | DUPLICATES1="$(echo -e "$HASHLIST1" | sort | uniq -D --check-chars=128)" | |
70 | ||
71 | # Format $DUPLICATES1 for readability by grouping, truncating sha512 hash; save to $DUPLICATES2 | |
72 | DUPLICATES2="$(echo -e "$DUPLICATES1" | uniq --check-chars=128 --group | cut --characters=$CUT_POSITION-)" | |
73 | ||
74 | #====Files with unique hashes==== | |
75 | # Generate sublist of unique entries, save to $UNIQUES1 | |
76 | UNIQUES1="$(echo -e "$HASHLIST1" | sort | uniq --unique --check-chars=128)" | |
77 | ||
78 | # Format $UNIQUES1 for readability by truncating sha512 hash; save to $UNIQUES2 | |
79 | UNIQUES2="$(echo -e "$UNIQUES1" | cut --characters=$CUT_POSITION-)" | |
80 | ||
81 | ||
82 | # List results | |
83 | echo -e "$DUPLICATES2" | |
84 | echo -e "$UNIQUES2" | |
85 | ||
86 | # Dependencies: | |
87 | # | |
88 | # - find (GNU findutils) 4.6.0.225-235f | |
89 | # Copyright (C) 2019 Free Software Foundation, Inc. | |
90 | # License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>. | |
91 | # Written by Eric B. Decker, James Youngman, and Kevin Dalley. | |
92 | # | |
93 | # - RHash v1.3.8 | |
94 | # License: RHash License <http://rhash.sourceforge.net/license.php> | |
95 | # | |
96 | # - uniq (GNU coreutils) 8.30 | |
97 | # Copyright (C) 2018 Free Software Foundation, Inc. | |
98 | # License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>. | |
99 | # This is free software: you are free to change and redistribute it. | |
100 | # There is NO WARRANTY, to the extent permitted by law. | |
101 | # Written by Richard M. Stallman and David MacKenzie. | |
102 | # | |
103 | # - cut (GNU coreutils) 8.30 | |
104 | # Copyright (C) 2018 Free Software Foundation, Inc. | |
105 | # License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>. | |
106 | # This is free software: you are free to change and redistribute it. | |
107 | # There is NO WARRANTY, to the extent permitted by law. | |
108 | # Written by David M. Ihnat, David MacKenzie, and Jim Meyering. | |
109 | # | |
110 | # - cat (GNU coreutils) 8.30 | |
111 | # Copyright (C) 2018 Free Software Foundation, Inc. | |
112 | # License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>. | |
113 | # This is free software: you are free to change and redistribute it. | |
114 | # There is NO WARRANTY, to the extent permitted by law. | |
115 | # Written by Torbjorn Granlund and Richard M. Stallman. | |
116 | # | |
117 | # - GNU bash, version 5.0.3(1)-release (x86_64-pc-linux-gnu) | |
118 | # Copyright (C) 2019 Free Software Foundation, Inc. | |
119 | # License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html> | |
120 | # This is free software; you are free to change and redistribute it. | |
121 | # There is NO WARRANTY, to the extent permitted by law. |