| 1 | #!/bin/bash |
| 2 | |
| 3 | # Date: 2020-01-20T17:08Z |
| 4 | # |
| 5 | # Author: Steven Baltakatei Sandoval (baltakatei.com) |
| 6 | # |
| 7 | # License: This bash script, `bkfind`, is licensed under GPLv3 or |
| 8 | # later by Steven Baltakatei Sandoval: |
| 9 | # |
| 10 | # `bkfind`, a duplicate file finder |
| 11 | # Copyright (C) 2020 Steven Baltakatei Sandoval (baltakatei.com) |
| 12 | # |
| 13 | # This program is free software: you can redistribute it and/or modify |
| 14 | # it under the terms of the GNU General Public License as published by |
| 15 | # the Free Software Foundation, either version 3 of the License, or |
| 16 | # any later version. |
| 17 | # |
| 18 | # This program is distributed in the hope that it will be useful, |
| 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 21 | # GNU General Public License for more details. |
| 22 | # |
| 23 | # A copy of the GNU General Public License may be found at |
| 24 | # <https://www.gnu.org/licenses/>. |
| 25 | # |
| 26 | # Description: This is a script that searches a specified directory |
| 27 | # for files with a file name containing a specified string. It works |
| 28 | # as follows: |
| 29 | # |
| 30 | # - Search specified directory tree for files that have filenames |
| 31 | # that contain the specified file's filename. List groups of files |
| 32 | # sharing the same hash first then list files with unique hashes. |
| 33 | # |
| 34 | # Dependencies: find, rhash, uniq, cut, cat, bash. See end of file |
| 35 | # |
| 36 | # Tested on: |
| 37 | # |
| 38 | # - GNU/Linux Debian 10 |
| 39 | # |
| 40 | |
| 41 | |
| 42 | #==Initialization== |
| 43 | # Use input arguments to define internal script variables. |
| 44 | DIR1="$1" # Specified directory |
| 45 | FILE1="$2" # Specified file |
| 46 | DUPLICATES1="" |
| 47 | DUPLICATES2="" |
| 48 | DUPLICATES3="" |
| 49 | UNIQUES1="" |
| 50 | UNIQUES2="" |
| 51 | UNIQUES3="" |
| 52 | RHASH_HASH_TYPE="sha512" |
| 53 | HASH_DISP_LENGTH=16 |
| 54 | let HASH_CHAR_LENGTH="512 / 4" # The number of characters returned by the chosen hash function (ex: `rhash --sha512 {}` produces 512/4=128 hexadecimal chars) |
| 55 | |
| 56 | # Strip path information from provided file name. |
| 57 | FILEBASE1=$(basename "$FILE1") |
| 58 | |
| 59 | |
| 60 | #==Main Program== |
| 61 | # Generate list of sha512 hashes and filepaths, save to $HASHLIST1 |
| 62 | HASHLIST1="$(find $DIR1 -type f -iname "*$FILEBASE1*" -exec rhash --"$RHASH_HASH_TYPE" {} \;)" |
| 63 | |
| 64 | # Specify character position before which characters are dropped from each line with `cut`. |
| 65 | let CUT_POSITION="1 + $HASH_CHAR_LENGTH - $HASH_DISP_LENGTH" |
| 66 | |
| 67 | #====Files with duplicate hashes==== |
| 68 | # Generate sublist of duplicate entries, save to $DUPLICATES1 |
| 69 | DUPLICATES1="$(echo -e "$HASHLIST1" | sort | uniq -D --check-chars=128)" |
| 70 | |
| 71 | # Format $DUPLICATES1 for readability by grouping, truncating sha512 hash; save to $DUPLICATES2 |
| 72 | DUPLICATES2="$(echo -e "$DUPLICATES1" | uniq --check-chars=128 --group | cut --characters=$CUT_POSITION-)" |
| 73 | |
| 74 | #====Files with unique hashes==== |
| 75 | # Generate sublist of unique entries, save to $UNIQUES1 |
| 76 | UNIQUES1="$(echo -e "$HASHLIST1" | sort | uniq --unique --check-chars=128)" |
| 77 | |
| 78 | # Format $UNIQUES1 for readability by truncating sha512 hash; save to $UNIQUES2 |
| 79 | UNIQUES2="$(echo -e "$UNIQUES1" | cut --characters=$CUT_POSITION-)" |
| 80 | |
| 81 | |
| 82 | # List results |
| 83 | echo -e "$DUPLICATES2" |
| 84 | echo -e "$UNIQUES2" |
| 85 | |
| 86 | # Dependencies: |
| 87 | # |
| 88 | # - find (GNU findutils) 4.6.0.225-235f |
| 89 | # Copyright (C) 2019 Free Software Foundation, Inc. |
| 90 | # License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>. |
| 91 | # Written by Eric B. Decker, James Youngman, and Kevin Dalley. |
| 92 | # |
| 93 | # - RHash v1.3.8 |
| 94 | # License: RHash License <http://rhash.sourceforge.net/license.php> |
| 95 | # |
| 96 | # - uniq (GNU coreutils) 8.30 |
| 97 | # Copyright (C) 2018 Free Software Foundation, Inc. |
| 98 | # License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>. |
| 99 | # This is free software: you are free to change and redistribute it. |
| 100 | # There is NO WARRANTY, to the extent permitted by law. |
| 101 | # Written by Richard M. Stallman and David MacKenzie. |
| 102 | # |
| 103 | # - cut (GNU coreutils) 8.30 |
| 104 | # Copyright (C) 2018 Free Software Foundation, Inc. |
| 105 | # License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>. |
| 106 | # This is free software: you are free to change and redistribute it. |
| 107 | # There is NO WARRANTY, to the extent permitted by law. |
| 108 | # Written by David M. Ihnat, David MacKenzie, and Jim Meyering. |
| 109 | # |
| 110 | # - cat (GNU coreutils) 8.30 |
| 111 | # Copyright (C) 2018 Free Software Foundation, Inc. |
| 112 | # License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>. |
| 113 | # This is free software: you are free to change and redistribute it. |
| 114 | # There is NO WARRANTY, to the extent permitted by law. |
| 115 | # Written by Torbjorn Granlund and Richard M. Stallman. |
| 116 | # |
| 117 | # - GNU bash, version 5.0.3(1)-release (x86_64-pc-linux-gnu) |
| 118 | # Copyright (C) 2019 Free Software Foundation, Inc. |
| 119 | # License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html> |
| 120 | # This is free software; you are free to change and redistribute it. |
| 121 | # There is NO WARRANTY, to the extent permitted by law. |