]>
zdv2.bktei.com Git - BK-2020-03.git/blob - unitproc/bkfind 
   3  # Date: 2020-01-20T17:08Z    5  # Author: Steven Baltakatei Sandoval (baltakatei.com)    7  # License: This bash script, `bkfind`, is licensed under GPLv3 or    8  # later by Steven Baltakatei Sandoval:   10  #    `bkfind`, a duplicate file finder   11  #    Copyright (C) 2020  Steven Baltakatei Sandoval (baltakatei.com)   13  #    This program is free software: you can redistribute it and/or modify   14  #    it under the terms of the GNU General Public License as published by   15  #    the Free Software Foundation, either version 3 of the License, or   18  #    This program is distributed in the hope that it will be useful,   19  #    but WITHOUT ANY WARRANTY; without even the implied warranty of   20  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the   21  #    GNU General Public License for more details.   23  #    A copy of the GNU General Public License may be found at   24  #    <https://www.gnu.org/licenses/>.   26  # Description: This is a script that searches a specified directory   27  # for files with a file name containing a specified string. It works   30  #   - Search specified directory tree for files that have filenames   31  #     that contain the specified file's filename. List groups of files   32  #     sharing the same hash first then list files with unique hashes.   34  # Dependencies: find, rhash, uniq, cut, cat, bash. See end of file   38  #   - GNU/Linux Debian 10   43  # Use input arguments to define internal script variables.   44  DIR1
= " $1 "   # Specified directory   45  FILE1
= " $2 "   # Specified file   52  RHASH_HASH_TYPE
= "sha512"   54  let  HASH_CHAR_LENGTH
= "512 / 4"  # The number of characters returned by the chosen hash function (ex: `rhash --sha512 {}` produces 512/4=128 hexadecimal chars)   56  # Strip path information from provided file name.   57  FILEBASE1
= $
( basename  " $FILE1 " )   61  # Generate list of sha512 hashes and filepaths, save to $HASHLIST1   62  HASHLIST1
= "$(find  $DIR1  -type f -iname " * $FILEBASE1 * " -exec rhash --" $RHASH_HASH_TYPE " {} \;)"   64  # Specify character position before which characters are dropped from each line with `cut`.   65  let  CUT_POSITION
= "1 +  $HASH_CHAR_LENGTH  -  $HASH_DISP_LENGTH "   67  #====Files with duplicate hashes====   68  # Generate sublist of duplicate entries, save to $DUPLICATES1   69  DUPLICATES1
= "$(echo -e " $HASHLIST1 " | sort | uniq -D --check-chars=128)"   71  # Format $DUPLICATES1 for readability by grouping, truncating sha512 hash; save to $DUPLICATES2   72  DUPLICATES2
= "$(echo -e " $DUPLICATES1 " | uniq --check-chars=128 --group | cut --characters= $CUT_POSITION -)"   74  #====Files with unique hashes====   75  # Generate sublist of unique entries, save to $UNIQUES1   76  UNIQUES1
= "$(echo -e " $HASHLIST1 " | sort | uniq --unique --check-chars=128)"   78  # Format $UNIQUES1 for readability by truncating sha512 hash; save to $UNIQUES2   79  UNIQUES2
= "$(echo -e " $UNIQUES1 " | cut --characters= $CUT_POSITION -)"   83  echo -e  " $DUPLICATES2 "   88  #     - find (GNU findutils) 4.6.0.225-235f   89  #       Copyright (C) 2019 Free Software Foundation, Inc.   90  #       License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>.   91  #       Written by Eric B. Decker, James Youngman, and Kevin Dalley.   94  #       License: RHash License <http://rhash.sourceforge.net/license.php>   96  #     - uniq (GNU coreutils) 8.30   97  #       Copyright (C) 2018 Free Software Foundation, Inc.   98  #       License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>.   99  #       This is free software: you are free to change and redistribute it.  100  #       There is NO WARRANTY, to the extent permitted by law.  101  #       Written by Richard M. Stallman and David MacKenzie.  103  #     - cut (GNU coreutils) 8.30  104  #       Copyright (C) 2018 Free Software Foundation, Inc.  105  #       License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>.  106  #       This is free software: you are free to change and redistribute it.  107  #       There is NO WARRANTY, to the extent permitted by law.  108  #       Written by David M. Ihnat, David MacKenzie, and Jim Meyering.  110  #     - cat (GNU coreutils) 8.30  111  #       Copyright (C) 2018 Free Software Foundation, Inc.  112  #       License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>.  113  #       This is free software: you are free to change and redistribute it.  114  #       There is NO WARRANTY, to the extent permitted by law.  115  #       Written by Torbjorn Granlund and Richard M. Stallman.  117  #     - GNU bash, version 5.0.3(1)-release (x86_64-pc-linux-gnu)  118  #       Copyright (C) 2019 Free Software Foundation, Inc.  119  #       License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>  120  #       This is free software; you are free to change and redistribute it.  121  #       There is NO WARRANTY, to the extent permitted by law.